Initial pull of code from r6xx-r7xx-support branch
authorRichard Li <RichardZ.Li@amd.com>
Tue, 21 Apr 2009 16:54:20 +0000 (12:54 -0400)
committerAlex Deucher <alexdeucher@gmail.com>
Tue, 21 Apr 2009 16:54:20 +0000 (12:54 -0400)
Not functional yet.

39 files changed:
src/mesa/drivers/dri/r600/Makefile
src/mesa/drivers/dri/r600/defaultendian.h [new file with mode: 0644]
src/mesa/drivers/dri/r600/r600_cmdbuf.c
src/mesa/drivers/dri/r600/r600_cmdbuf.h
src/mesa/drivers/dri/r600/r600_context.c
src/mesa/drivers/dri/r600/r600_context.h
src/mesa/drivers/dri/r600/r600_emit.c
src/mesa/drivers/dri/r600/r600_reg_auto_r6xx.h [new file with mode: 0644]
src/mesa/drivers/dri/r600/r600_reg_r6xx.h [new file with mode: 0644]
src/mesa/drivers/dri/r600/r600_reg_r7xx.h [new file with mode: 0644]
src/mesa/drivers/dri/r600/r600_swtcl.c
src/mesa/drivers/dri/r600/r700_assembler.c [new file with mode: 0644]
src/mesa/drivers/dri/r600/r700_assembler.h [new file with mode: 0644]
src/mesa/drivers/dri/r600/r700_chip.c [new file with mode: 0644]
src/mesa/drivers/dri/r600/r700_chip.h [new file with mode: 0644]
src/mesa/drivers/dri/r600/r700_chipoffset.h [new file with mode: 0644]
src/mesa/drivers/dri/r600/r700_clear.c [new file with mode: 0644]
src/mesa/drivers/dri/r600/r700_clear.h [new file with mode: 0644]
src/mesa/drivers/dri/r600/r700_debug.c [new file with mode: 0644]
src/mesa/drivers/dri/r600/r700_debug.h [new file with mode: 0644]
src/mesa/drivers/dri/r600/r700_fragprog.c [new file with mode: 0644]
src/mesa/drivers/dri/r600/r700_fragprog.h
src/mesa/drivers/dri/r600/r700_ioctl.c [new file with mode: 0644]
src/mesa/drivers/dri/r600/r700_ioctl.h [new file with mode: 0644]
src/mesa/drivers/dri/r600/r700_oglprog.c [new file with mode: 0644]
src/mesa/drivers/dri/r600/r700_oglprog.h [new file with mode: 0644]
src/mesa/drivers/dri/r600/r700_render.c [new file with mode: 0644]
src/mesa/drivers/dri/r600/r700_shader.c [new file with mode: 0644]
src/mesa/drivers/dri/r600/r700_shader.h [new file with mode: 0644]
src/mesa/drivers/dri/r600/r700_shaderinst.c [new file with mode: 0644]
src/mesa/drivers/dri/r600/r700_shaderinst.h [new file with mode: 0644]
src/mesa/drivers/dri/r600/r700_state.c [new file with mode: 0644]
src/mesa/drivers/dri/r600/r700_state.h [new file with mode: 0644]
src/mesa/drivers/dri/r600/r700_tex.c [new file with mode: 0644]
src/mesa/drivers/dri/r600/r700_tex.h [new file with mode: 0644]
src/mesa/drivers/dri/r600/r700_vertprog.c [new file with mode: 0644]
src/mesa/drivers/dri/r600/r700_vertprog.h [new file with mode: 0644]
src/mesa/drivers/dri/r600/sq_micro_reg.h [new file with mode: 0644]
src/mesa/drivers/dri/radeon/radeon_screen.c

index 528396df270f457684f1f3546d2f12734d8f5af9..00c56c03cdaf8b5aea36983365aa5f6051f1d18f 100644 (file)
@@ -37,23 +37,23 @@ RADEON_COMMON_SOURCES = \
 DRIVER_SOURCES = \
                 radeon_screen.c \
                 r600_context.c \
-                r600_ioctl.c \
                 r600_cmdbuf.c \
-                r600_state.c \
-                r600_render.c \
-                r600_tex.c \
-                r600_texstate.c \
                 radeon_program.c \
                 radeon_program_alu.c \
                 radeon_program_pair.c \
                 radeon_nqssadce.c \
-                r600_vertprog.c \
-                r600_fragprog.c \
-                r600_fragprog_swizzle.c \
-                r600_fragprog_emit.c \
-                r600_shader.c \
-                r600_emit.c \
-                r600_swtcl.c \
+                r600_emit.c       \
+                r600_swtcl.c      \
+                r700_assembler.c  \
+                r700_fragprog.c \
+                r700_vertprog.c \
+                r700_shader.c \
+                r700_ioctl.c \
+                r700_oglprog.c \
+                r700_chip.c     \
+                r700_render.c   \
+                r700_tex.c      \
+                r700_debug.c    \
                 $(RADEON_COMMON_SOURCES) \
                 $(EGL_SOURCES)
 
diff --git a/src/mesa/drivers/dri/r600/defaultendian.h b/src/mesa/drivers/dri/r600/defaultendian.h
new file mode 100644 (file)
index 0000000..32caf32
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+
+#ifndef _DEFINEENDIAN_H_
+#define _DEFINEENDIAN_H_
+
+//We have to choose a reg bits orientation if there is no compile flag for it.
+#if defined(LITTLEENDIAN_CPU)
+#elif defined(BIGENDIAN_CPU)
+#else
+#define LITTLEENDIAN_CPU
+#endif
+
+#endif //_DEFINEENDIAN_H_
index e379984417e7bc1b0862d729e55c96d8a99087c6..bc14f9a75554c2aeb445c34fca41f5c87409f0a9 100644 (file)
@@ -45,493 +45,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "radeon_drm.h"
 
 #include "r600_context.h"
-#include "r600_ioctl.h"
 #include "radeon_reg.h"
-#include "r600_reg.h"
 #include "r600_cmdbuf.h"
 #include "r600_emit.h"
 #include "radeon_bocs_wrapper.h"
 #include "radeon_mipmap_tree.h"
-#include "r600_state.h"
 #include "radeon_reg.h"
 
-#define R600_VAP_PVS_UPLOAD_ADDRESS 0x2200
-#   define RADEON_ONE_REG_WR        (1 << 15)
-
-/** # of dwords reserved for additional instructions that may need to be written
- * during flushing.
- */
-#define SPACE_FOR_FLUSHING     4
-
-static unsigned packet0_count(r600ContextPtr r600, uint32_t *pkt)
-{
-    if (r600->radeon.radeonScreen->kernel_mm) {
-        return ((((*pkt) >> 16) & 0x3FFF) + 1);
-    } else {
-        drm_r300_cmd_header_t *t = (drm_r300_cmd_header_t*)pkt;
-        return t->packet0.count;
-    }
-    return 0;
-}
-
-#define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count)
-
-void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom)
-{
-       r600ContextPtr r600 = R600_CONTEXT(ctx);
-       BATCH_LOCALS(&r600->radeon);
-       drm_r300_cmd_header_t cmd;
-       uint32_t addr, ndw, i;
-       
-       if (!r600->radeon.radeonScreen->kernel_mm) {
-               uint32_t dwords;
-               dwords = (*atom->check) (ctx, atom);
-               BEGIN_BATCH_NO_AUTOSTATE(dwords);
-               OUT_BATCH_TABLE(atom->cmd, dwords);
-               END_BATCH();
-               return;
-       }
-       
-       cmd.u = atom->cmd[0];
-       addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo;
-       ndw = cmd.vpu.count * 4;
-       if (ndw) {
-
-               if (r600->vap_flush_needed) {
-                       BEGIN_BATCH_NO_AUTOSTATE(15 + ndw);
-
-                       /* flush processing vertices */
-                       OUT_BATCH_REGVAL(R600_SC_SCREENDOOR, 0);
-                       OUT_BATCH_REGVAL(R600_RB3D_DSTCACHE_CTLSTAT, R600_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
-                       OUT_BATCH_REGVAL(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
-                       OUT_BATCH_REGVAL(R600_SC_SCREENDOOR, 0xffffff);
-                       OUT_BATCH_REGVAL(R600_VAP_PVS_STATE_FLUSH_REG, 0);
-                       r600->vap_flush_needed = GL_FALSE;
-               } else {
-                       BEGIN_BATCH_NO_AUTOSTATE(5 + ndw);
-               }
-               OUT_BATCH_REGVAL(R600_VAP_PVS_UPLOAD_ADDRESS, addr);
-               OUT_BATCH(CP_PACKET0(R600_VAP_PVS_UPLOAD_DATA, ndw-1) | RADEON_ONE_REG_WR);
-               for (i = 0; i < ndw; i++) {
-                       OUT_BATCH(atom->cmd[i+1]);
-               }
-               OUT_BATCH_REGVAL(R600_VAP_PVS_STATE_FLUSH_REG, 0);
-               END_BATCH();
-       }
-}
-
-static void emit_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom)
-{
-       r600ContextPtr r600 = R600_CONTEXT(ctx);
-       BATCH_LOCALS(&r600->radeon);
-       int numtmus = packet0_count(r600, r600->hw.tex.offset.cmd);
-       int notexture = 0;
-
-       if (numtmus) {
-               int i;
-
-               for(i = 0; i < numtmus; ++i) {
-                   radeonTexObj *t = r600->hw.textures[i];
-               
-                   if (!t)
-                       notexture = 1;
-               }
-
-               if (r600->radeon.radeonScreen->kernel_mm && notexture) {
-                       return;
-               }
-               BEGIN_BATCH_NO_AUTOSTATE(4 * numtmus);
-               for(i = 0; i < numtmus; ++i) {
-                   radeonTexObj *t = r600->hw.textures[i];
-                   OUT_BATCH_REGSEQ(R600_TX_OFFSET_0 + (i * 4), 1);
-                   if (t && !t->image_override) {
-                           OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
-                                           RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
-                   } else if (!t) {
-                           OUT_BATCH(r600->radeon.radeonScreen->texOffset[0]);
-                   } else { /* override cases */
-                           if (t->bo) {
-                                   OUT_BATCH_RELOC(t->tile_bits, t->bo, 0,
-                                                   RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
-                           } else if (!r600->radeon.radeonScreen->kernel_mm) {
-                                   OUT_BATCH(t->override_offset);
-                           }
-                           else
-                               OUT_BATCH(r600->radeon.radeonScreen->texOffset[0]);
-                   }
-               }
-               END_BATCH();
-       }
-}
-
-static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
-{
-       r600ContextPtr r600 = R600_CONTEXT(ctx);
-       BATCH_LOCALS(&r600->radeon);
-       struct radeon_renderbuffer *rrb;
-       uint32_t cbpitch;
-       uint32_t offset = r600->radeon.state.color.draw_offset;
-
-       rrb = radeon_get_colorbuffer(&r600->radeon);
-       if (!rrb || !rrb->bo) {
-               fprintf(stderr, "no rrb\n");
-               return;
-       }
-
-       cbpitch = (rrb->pitch / rrb->cpp);
-       if (rrb->cpp == 4)
-               cbpitch |= R600_COLOR_FORMAT_ARGB8888;
-       else
-               cbpitch |= R600_COLOR_FORMAT_RGB565;
-
-       if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
-               cbpitch |= R600_COLOR_TILE_ENABLE;
-
-       BEGIN_BATCH_NO_AUTOSTATE(8);
-       OUT_BATCH_REGSEQ(R600_RB3D_COLOROFFSET0, 1);
-       OUT_BATCH_RELOC(offset, rrb->bo, offset, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-       OUT_BATCH_REGSEQ(R600_RB3D_COLORPITCH0, 1);
-       OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-       END_BATCH();
-    if (r600->radeon.radeonScreen->driScreen->dri2.enabled) {
-        if (r600->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
-            BEGIN_BATCH_NO_AUTOSTATE(3);
-            OUT_BATCH_REGSEQ(R600_SC_SCISSORS_TL, 2);
-            OUT_BATCH(0);
-            OUT_BATCH((rrb->width << R600_SCISSORS_X_SHIFT) |
-                    (rrb->height << R600_SCISSORS_Y_SHIFT));
-            END_BATCH();
-        } else {
-            BEGIN_BATCH_NO_AUTOSTATE(3);
-            OUT_BATCH_REGSEQ(R600_SC_SCISSORS_TL, 2);
-            OUT_BATCH((R600_SCISSORS_OFFSET << R600_SCISSORS_X_SHIFT) |
-                    (R600_SCISSORS_OFFSET << R600_SCISSORS_Y_SHIFT));
-            OUT_BATCH(((rrb->width + R600_SCISSORS_OFFSET) << R600_SCISSORS_X_SHIFT) |
-                    ((rrb->height + R600_SCISSORS_OFFSET) << R600_SCISSORS_Y_SHIFT));
-            END_BATCH();
-        }
-    }
-}
-
-static void emit_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
-{
-       r600ContextPtr r600 = R600_CONTEXT(ctx);
-       BATCH_LOCALS(&r600->radeon);
-       struct radeon_renderbuffer *rrb;
-       uint32_t zbpitch;
-
-       rrb = radeon_get_depthbuffer(&r600->radeon);
-       if (!rrb)
-               return;
-
-       zbpitch = (rrb->pitch / rrb->cpp);
-       if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
-               zbpitch |= R600_DEPTHMACROTILE_ENABLE;
-       }
-       if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){
-               zbpitch |= R600_DEPTHMICROTILE_TILED;
-       }
-       
-       BEGIN_BATCH_NO_AUTOSTATE(6);
-       OUT_BATCH_REGSEQ(R600_ZB_DEPTHOFFSET, 1);
-       OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-       OUT_BATCH_REGVAL(R600_ZB_DEPTHPITCH, zbpitch);
-       END_BATCH();
-}
-
-static void emit_zstencil_format(GLcontext *ctx, struct radeon_state_atom * atom)
-{
-       r600ContextPtr r600 = R600_CONTEXT(ctx);
-       BATCH_LOCALS(&r600->radeon);
-       struct radeon_renderbuffer *rrb;
-       uint32_t format = 0;
-
-       rrb = radeon_get_depthbuffer(&r600->radeon);
-       if (!rrb)
-         format = 0;
-       else {
-         if (rrb->cpp == 2)
-           format = R600_DEPTHFORMAT_16BIT_INT_Z;
-         else if (rrb->cpp == 4)
-           format = R600_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
-       }
-
-       OUT_BATCH(atom->cmd[0]);
-       atom->cmd[1] &= ~0xf;
-       atom->cmd[1] |= format;
-       OUT_BATCH(atom->cmd[1]);
-       OUT_BATCH(atom->cmd[2]);
-       OUT_BATCH(atom->cmd[3]);
-       OUT_BATCH(atom->cmd[4]);
-}
-
-static int check_always(GLcontext *ctx, struct radeon_state_atom *atom)
-{
-       return atom->cmd_size;
-}
-
-static int check_variable(GLcontext *ctx, struct radeon_state_atom *atom)
-{
-       r600ContextPtr r600 = R600_CONTEXT(ctx);
-       int cnt;
-       if (atom->cmd[0] == CP_PACKET2) {
-               return 0;
-       }
-       cnt = packet0_count(r600, atom->cmd);
-       return cnt ? cnt + 1 : 0;
-}
-
-int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom)
-{
-       int cnt;
-
-       cnt = vpu_count(atom->cmd);
-       return cnt ? (cnt * 4) + 1 : 0;
-}
-
-#define ALLOC_STATE( ATOM, CHK, SZ, IDX )                              \
-   do {                                                                        \
-      r600->hw.ATOM.cmd_size = (SZ);                                   \
-      r600->hw.ATOM.cmd = (uint32_t*)CALLOC((SZ) * sizeof(uint32_t));  \
-      r600->hw.ATOM.name = #ATOM;                                      \
-      r600->hw.ATOM.idx = (IDX);                                       \
-      r600->hw.ATOM.check = check_##CHK;                               \
-      r600->hw.ATOM.dirty = GL_FALSE;                                  \
-      r600->radeon.hw.max_state_size += (SZ);                                  \
-      insert_at_tail(&r600->radeon.hw.atomlist, &r600->hw.ATOM);               \
-   } while (0)
 /**
  * Allocate memory for the command buffer and initialize the state atom
  * list. Note that the initial hardware state is set by r600InitState().
  */
 void r600InitCmdBuf(r600ContextPtr r600)
 {
-       int mtu;
-       int i;
-
-       r600->radeon.hw.max_state_size = 2 + 2; /* reserve extra space for WAIT_IDLE and tex cache flush */
-
-       mtu = r600->radeon.glCtx->Const.MaxTextureUnits;
-       if (RADEON_DEBUG & DEBUG_TEXTURE) {
-               fprintf(stderr, "Using %d maximum texture units..\n", mtu);
-       }
-
-       /* Setup the atom linked list */
-       make_empty_list(&r600->radeon.hw.atomlist);
-       r600->radeon.hw.atomlist.name = "atom-list";
-
-       /* Initialize state atoms */
-       ALLOC_STATE(vpt, always, R600_VPT_CMDSIZE, 0);
-       r600->hw.vpt.cmd[R600_VPT_CMD_0] = cmdpacket0(r600->radeon.radeonScreen, R600_SE_VPORT_XSCALE, 6);
-       ALLOC_STATE(vap_cntl, always, R600_VAP_CNTL_SIZE, 0);
-       r600->hw.vap_cntl.cmd[R600_VAP_CNTL_FLUSH] = cmdpacket0(r600->radeon.radeonScreen, R600_VAP_PVS_STATE_FLUSH_REG, 1);
-       r600->hw.vap_cntl.cmd[R600_VAP_CNTL_FLUSH_1] = 0;
-       r600->hw.vap_cntl.cmd[R600_VAP_CNTL_CMD] = cmdpacket0(r600->radeon.radeonScreen, R600_VAP_CNTL, 1);
-
-       ALLOC_STATE(vte, always, 3, 0);
-       r600->hw.vte.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, R600_SE_VTE_CNTL, 2);
-       ALLOC_STATE(vap_vf_max_vtx_indx, always, 3, 0);
-       r600->hw.vap_vf_max_vtx_indx.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, R600_VAP_VF_MAX_VTX_INDX, 2);
-       ALLOC_STATE(vap_cntl_status, always, 2, 0);
-       r600->hw.vap_cntl_status.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, R600_VAP_CNTL_STATUS, 1);
-       ALLOC_STATE(vir[0], variable, R600_VIR_CMDSIZE, 0);
-       r600->hw.vir[0].cmd[R600_VIR_CMD_0] =
-           cmdpacket0(r600->radeon.radeonScreen, R600_VAP_PROG_STREAM_CNTL_0, 1);
-       ALLOC_STATE(vir[1], variable, R600_VIR_CMDSIZE, 1);
-       r600->hw.vir[1].cmd[R600_VIR_CMD_0] =
-           cmdpacket0(r600->radeon.radeonScreen, R600_VAP_PROG_STREAM_CNTL_EXT_0, 1);
-       ALLOC_STATE(vic, always, R600_VIC_CMDSIZE, 0);
-       r600->hw.vic.cmd[R600_VIC_CMD_0] = cmdpacket0(r600->radeon.radeonScreen, R600_VAP_VTX_STATE_CNTL, 2);
-       ALLOC_STATE(vap_psc_sgn_norm_cntl, always, 2, 0);
-       r600->hw.vap_psc_sgn_norm_cntl.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, R600_VAP_PSC_SGN_NORM_CNTL, SGN_NORM_ZERO_CLAMP_MINUS_ONE);
-
-       ALLOC_STATE(vap_clip_cntl, always, 2, 0);
-       r600->hw.vap_clip_cntl.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, R600_VAP_CLIP_CNTL, 1);
-       ALLOC_STATE(vap_clip, always, 5, 0);
-       r600->hw.vap_clip.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, R600_VAP_GB_VERT_CLIP_ADJ, 4);
-       ALLOC_STATE(vap_pvs_vtx_timeout_reg, always, 2, 0);
-       r600->hw.vap_pvs_vtx_timeout_reg.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, VAP_PVS_VTX_TIMEOUT_REG, 1);
-
-       ALLOC_STATE(vof, always, R600_VOF_CMDSIZE, 0);
-       r600->hw.vof.cmd[R600_VOF_CMD_0] =
-           cmdpacket0(r600->radeon.radeonScreen, R600_VAP_OUTPUT_VTX_FMT_0, 2);
-
-       ALLOC_STATE(pvs, always, R600_PVS_CMDSIZE, 0);
-       r600->hw.pvs.cmd[R600_PVS_CMD_0] =
-               cmdpacket0(r600->radeon.radeonScreen, R600_VAP_PVS_CODE_CNTL_0, 3);
-
-       ALLOC_STATE(gb_enable, always, 2, 0);
-       r600->hw.gb_enable.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, R600_GB_ENABLE, 1);
-       ALLOC_STATE(gb_misc, always, R600_GB_MISC_CMDSIZE, 0);
-       r600->hw.gb_misc.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, R600_GB_MSPOS0, 5);
-       ALLOC_STATE(txe, always, R600_TXE_CMDSIZE, 0);
-       r600->hw.txe.cmd[R600_TXE_CMD_0] = cmdpacket0(r600->radeon.radeonScreen, R600_TX_ENABLE, 1);
-       ALLOC_STATE(ga_point_s0, always, 5, 0);
-       r600->hw.ga_point_s0.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, R600_GA_POINT_S0, 4);
-       ALLOC_STATE(ga_triangle_stipple, always, 2, 0);
-       r600->hw.ga_triangle_stipple.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, R600_GA_TRIANGLE_STIPPLE, 1);
-       ALLOC_STATE(ps, always, R600_PS_CMDSIZE, 0);
-       r600->hw.ps.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, R600_GA_POINT_SIZE, 1);
-       ALLOC_STATE(ga_point_minmax, always, 4, 0);
-       r600->hw.ga_point_minmax.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, R600_GA_POINT_MINMAX, 3);
-       ALLOC_STATE(lcntl, always, 2, 0);
-       r600->hw.lcntl.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, R600_GA_LINE_CNTL, 1);
-       ALLOC_STATE(ga_line_stipple, always, 4, 0);
-       r600->hw.ga_line_stipple.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, R600_GA_LINE_STIPPLE_VALUE, 3);
-       ALLOC_STATE(shade, always, 5, 0);
-       r600->hw.shade.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, R600_GA_ENHANCE, 4);
-       ALLOC_STATE(polygon_mode, always, 4, 0);
-       r600->hw.polygon_mode.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, R600_GA_POLY_MODE, 3);
-       ALLOC_STATE(fogp, always, 3, 0);
-       r600->hw.fogp.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, R600_GA_FOG_SCALE, 2);
-       ALLOC_STATE(zbias_cntl, always, 2, 0);
-       r600->hw.zbias_cntl.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, R600_SU_TEX_WRAP, 1);
-       ALLOC_STATE(zbs, always, R600_ZBS_CMDSIZE, 0);
-       r600->hw.zbs.cmd[R600_ZBS_CMD_0] =
-           cmdpacket0(r600->radeon.radeonScreen, R600_SU_POLY_OFFSET_FRONT_SCALE, 4);
-       ALLOC_STATE(occlusion_cntl, always, 2, 0);
-       r600->hw.occlusion_cntl.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, R600_SU_POLY_OFFSET_ENABLE, 1);
-       ALLOC_STATE(cul, always, R600_CUL_CMDSIZE, 0);
-       r600->hw.cul.cmd[R600_CUL_CMD_0] = cmdpacket0(r600->radeon.radeonScreen, R600_SU_CULL_MODE, 1);
-       ALLOC_STATE(su_depth_scale, always, 3, 0);
-       r600->hw.su_depth_scale.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, R600_SU_DEPTH_SCALE, 2);
-       ALLOC_STATE(rc, always, R600_RC_CMDSIZE, 0);
-       r600->hw.rc.cmd[R600_RC_CMD_0] = cmdpacket0(r600->radeon.radeonScreen, R600_RS_COUNT, 2);
-
-       ALLOC_STATE(ri, always, R600_RI_CMDSIZE, 0);
-       r600->hw.ri.cmd[R600_RI_CMD_0] = cmdpacket0(r600->radeon.radeonScreen, R600_RS_IP_0, 8);
-       ALLOC_STATE(rr, variable, R600_RR_CMDSIZE, 0);
-       r600->hw.rr.cmd[R600_RR_CMD_0] = cmdpacket0(r600->radeon.radeonScreen, R600_RS_INST_0, 1);
-
-       ALLOC_STATE(sc_hyperz, always, 3, 0);
-       r600->hw.sc_hyperz.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, R600_SC_HYPERZ, 2);
-       ALLOC_STATE(sc_screendoor, always, 2, 0);
-       r600->hw.sc_screendoor.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, R600_SC_SCREENDOOR, 1);
-       ALLOC_STATE(us_out_fmt, always, 6, 0);
-       r600->hw.us_out_fmt.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, R600_US_OUT_FMT, 5);
-
-       ALLOC_STATE(fp, always, R600_FP_CMDSIZE, 0);
-       r600->hw.fp.cmd[R600_FP_CMD_0] = cmdpacket0(r600->radeon.radeonScreen, R600_US_CONFIG, 3);
-       r600->hw.fp.cmd[R600_FP_CMD_1] = cmdpacket0(r600->radeon.radeonScreen, R600_US_CODE_ADDR_0, 4);
-
-       ALLOC_STATE(fpt, variable, R600_FPT_CMDSIZE, 0);
-       r600->hw.fpt.cmd[R600_FPT_CMD_0] = cmdpacket0(r600->radeon.radeonScreen, R600_US_TEX_INST_0, 0);
-
-       ALLOC_STATE(fpi[0], variable, R600_FPI_CMDSIZE, 0);
-       r600->hw.fpi[0].cmd[R600_FPI_CMD_0] = cmdpacket0(r600->radeon.radeonScreen, R600_US_ALU_RGB_INST_0, 1);
-       ALLOC_STATE(fpi[1], variable, R600_FPI_CMDSIZE, 1);
-       r600->hw.fpi[1].cmd[R600_FPI_CMD_0] = cmdpacket0(r600->radeon.radeonScreen, R600_US_ALU_RGB_ADDR_0, 1);
-       ALLOC_STATE(fpi[2], variable, R600_FPI_CMDSIZE, 2);
-       r600->hw.fpi[2].cmd[R600_FPI_CMD_0] = cmdpacket0(r600->radeon.radeonScreen, R600_US_ALU_ALPHA_INST_0, 1);
-       ALLOC_STATE(fpi[3], variable, R600_FPI_CMDSIZE, 3);
-       r600->hw.fpi[3].cmd[R600_FPI_CMD_0] = cmdpacket0(r600->radeon.radeonScreen, R600_US_ALU_ALPHA_ADDR_0, 1);
-       ALLOC_STATE(fpp, variable, R600_FPP_CMDSIZE, 0);
-       r600->hw.fpp.cmd[R600_FPP_CMD_0] = cmdpacket0(r600->radeon.radeonScreen, R600_PFS_PARAM_0_X, 0);
-
-       ALLOC_STATE(fogs, always, R600_FOGS_CMDSIZE, 0);
-       r600->hw.fogs.cmd[R600_FOGS_CMD_0] = cmdpacket0(r600->radeon.radeonScreen, R600_FG_FOG_BLEND, 1);
-       ALLOC_STATE(fogc, always, R600_FOGC_CMDSIZE, 0);
-       r600->hw.fogc.cmd[R600_FOGC_CMD_0] = cmdpacket0(r600->radeon.radeonScreen, R600_FG_FOG_COLOR_R, 3);
-       ALLOC_STATE(at, always, R600_AT_CMDSIZE, 0);
-       r600->hw.at.cmd[R600_AT_CMD_0] = cmdpacket0(r600->radeon.radeonScreen, R600_FG_ALPHA_FUNC, 2);
-       ALLOC_STATE(fg_depth_src, always, 2, 0);
-       r600->hw.fg_depth_src.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, R600_FG_DEPTH_SRC, 1);
-       ALLOC_STATE(rb3d_cctl, always, 2, 0);
-       r600->hw.rb3d_cctl.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, R600_RB3D_CCTL, 1);
-       ALLOC_STATE(bld, always, R600_BLD_CMDSIZE, 0);
-       r600->hw.bld.cmd[R600_BLD_CMD_0] = cmdpacket0(r600->radeon.radeonScreen, R600_RB3D_CBLEND, 2);
-       ALLOC_STATE(cmk, always, R600_CMK_CMDSIZE, 0);
-       r600->hw.cmk.cmd[R600_CMK_CMD_0] = cmdpacket0(r600->radeon.radeonScreen, RB3D_COLOR_CHANNEL_MASK, 1);
-
-       ALLOC_STATE(blend_color, always, 2, 0);
-       r600->hw.blend_color.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, R600_RB3D_BLEND_COLOR, 1);
-
-       ALLOC_STATE(rop, always, 2, 0);
-       r600->hw.rop.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, R600_RB3D_ROPCNTL, 1);
-       ALLOC_STATE(cb, always, R600_CB_CMDSIZE, 0);
-       r600->hw.cb.emit = &emit_cb_offset;
-       ALLOC_STATE(rb3d_dither_ctl, always, 10, 0);
-       r600->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, R600_RB3D_DITHER_CTL, 9);
-       ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0);
-       r600->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, R600_RB3D_AARESOLVE_CTL, 1);
-
-       ALLOC_STATE(zs, always, R600_ZS_CMDSIZE, 0);
-       r600->hw.zs.cmd[R600_ZS_CMD_0] =
-           cmdpacket0(r600->radeon.radeonScreen, R600_ZB_CNTL, 3);
-
-       ALLOC_STATE(zstencil_format, always, 5, 0);
-       r600->hw.zstencil_format.cmd[0] =
-           cmdpacket0(r600->radeon.radeonScreen, R600_ZB_FORMAT, 4);
-       r600->hw.zstencil_format.emit = emit_zstencil_format;
-
-       ALLOC_STATE(zb, always, R600_ZB_CMDSIZE, 0);
-       r600->hw.zb.emit = emit_zb_offset;
-       ALLOC_STATE(zb_depthclearvalue, always, 2, 0);
-       r600->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, R600_ZB_DEPTHCLEARVALUE, 1);
-       ALLOC_STATE(unk4F30, always, 3, 0);
-       r600->hw.unk4F30.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, 0x4F30, 2);
-       ALLOC_STATE(zb_hiz_offset, always, 2, 0);
-       r600->hw.zb_hiz_offset.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, R600_ZB_HIZ_OFFSET, 1);
-       ALLOC_STATE(zb_hiz_pitch, always, 2, 0);
-       r600->hw.zb_hiz_pitch.cmd[0] = cmdpacket0(r600->radeon.radeonScreen, R600_ZB_HIZ_PITCH, 1);
-
-       ALLOC_STATE(vpi, vpu, R600_VPI_CMDSIZE, 0);
-       r600->hw.vpi.cmd[0] =
-               cmdvpu(r600->radeon.radeonScreen, R600_PVS_CODE_START, 0);
-       r600->hw.vpi.emit = emit_vpu;
-
-       ALLOC_STATE(vpp, vpu, R600_VPP_CMDSIZE, 0);
-       r600->hw.vpp.cmd[0] =
-               cmdvpu(r600->radeon.radeonScreen, R600_PVS_CONST_START, 0);
-       r600->hw.vpp.emit = emit_vpu;
-
-       ALLOC_STATE(vps, vpu, R600_VPS_CMDSIZE, 0);
-       r600->hw.vps.cmd[0] =
-               cmdvpu(r600->radeon.radeonScreen, R600_POINT_VPORT_SCALE_OFFSET, 1);
-       r600->hw.vps.emit = emit_vpu;
-
-       for (i = 0; i < 6; i++) {
-               ALLOC_STATE(vpucp[i], vpu, R600_VPUCP_CMDSIZE, 0);
-               r600->hw.vpucp[i].cmd[0] =
-                       cmdvpu(r600->radeon.radeonScreen,
-                              R600_PVS_UCP_START + i, 1);
-               r600->hw.vpucp[i].emit = emit_vpu;
-       }
-
-       /* Textures */
-       ALLOC_STATE(tex.filter, variable, mtu + 1, 0);
-       r600->hw.tex.filter.cmd[R600_TEX_CMD_0] =
-           cmdpacket0(r600->radeon.radeonScreen, R600_TX_FILTER0_0, 0);
-
-       ALLOC_STATE(tex.filter_1, variable, mtu + 1, 0);
-       r600->hw.tex.filter_1.cmd[R600_TEX_CMD_0] =
-           cmdpacket0(r600->radeon.radeonScreen, R600_TX_FILTER1_0, 0);
-
-       ALLOC_STATE(tex.size, variable, mtu + 1, 0);
-       r600->hw.tex.size.cmd[R600_TEX_CMD_0] = cmdpacket0(r600->radeon.radeonScreen, R600_TX_SIZE_0, 0);
-
-       ALLOC_STATE(tex.format, variable, mtu + 1, 0);
-       r600->hw.tex.format.cmd[R600_TEX_CMD_0] =
-           cmdpacket0(r600->radeon.radeonScreen, R600_TX_FORMAT_0, 0);
-
-       ALLOC_STATE(tex.pitch, variable, mtu + 1, 0);
-       r600->hw.tex.pitch.cmd[R600_TEX_CMD_0] = cmdpacket0(r600->radeon.radeonScreen, R600_TX_FORMAT2_0, 0);
-
-       ALLOC_STATE(tex.offset, variable, 1, 0);
-       r600->hw.tex.offset.cmd[R600_TEX_CMD_0] =
-           cmdpacket0(r600->radeon.radeonScreen, R600_TX_OFFSET_0, 0);
-       r600->hw.tex.offset.emit = &emit_tex_offsets;
-
-       ALLOC_STATE(tex.chroma_key, variable, mtu + 1, 0);
-       r600->hw.tex.chroma_key.cmd[R600_TEX_CMD_0] =
-           cmdpacket0(r600->radeon.radeonScreen, R600_TX_CHROMA_KEY_0, 0);
-
-       ALLOC_STATE(tex.border_color, variable, mtu + 1, 0);
-       r600->hw.tex.border_color.cmd[R600_TEX_CMD_0] =
-           cmdpacket0(r600->radeon.radeonScreen, R600_TX_BORDER_COLOR_0, 0);
-
-       r600->radeon.hw.is_dirty = GL_TRUE;
-       r600->radeon.hw.all_dirty = GL_TRUE;
-
        rcommonInitCmdBuf(&r600->radeon);
 }
index 6a921a23f66975b3c4573b9404bb7a1896212680..1bab9c5821332c07d02eb6802b050a4e9288a859 100644 (file)
@@ -40,11 +40,4 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 extern void r600InitCmdBuf(r600ContextPtr r600);
 
-void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom);
-int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom);
-
-void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom);
-int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom);
-int check_r500fp_const(GLcontext *ctx, struct radeon_state_atom *atom);
-
 #endif                         /* __R600_CMDBUF_H__ */
index 6754acba4daa0f930d23de1c8bf7ab2878c706a8..1b43f68ac2f4611b25531972f53666090a0d64d4 100644 (file)
@@ -60,13 +60,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "radeon_context.h"
 #include "radeon_span.h"
 #include "r600_cmdbuf.h"
-#include "r600_state.h"
-#include "r600_ioctl.h"
-#include "r600_tex.h"
 #include "r600_emit.h"
 #include "r600_swtcl.h"
 #include "radeon_bocs_wrapper.h"
 
+#include "r700_chip.h"
+#include "r700_state.h"
+#include "r700_ioctl.h"
+
 
 #include "vblank.h"
 #include "utils.h"
@@ -216,44 +217,12 @@ static void r600_get_lock(radeonContextPtr rmesa)
 static void r600_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa)
 {
     /* please flush pipe do all pending work */
-    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
-                                  R600_SC_SCREENDOOR, 1));
-    radeon_cs_write_dword(cs, 0x0);
-    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
-                                  R600_SC_SCREENDOOR, 1));
-    radeon_cs_write_dword(cs, 0x00FFFFFF);
-    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
-                                  R600_SC_HYPERZ, 1));
-    radeon_cs_write_dword(cs, 0x0);
-    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
-                                  R600_US_CONFIG, 1));
-    radeon_cs_write_dword(cs, 0x0);
-    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
-                                  R600_ZB_CNTL, 1));
-    radeon_cs_write_dword(cs, 0x0);
-    radeon_cs_write_dword(cs, cmdwait(rmesa->radeonScreen, R300_WAIT_3D));
-    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
-                                  R600_RB3D_DSTCACHE_CTLSTAT, 1));
-    radeon_cs_write_dword(cs, R600_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
-    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
-                                  R600_ZB_ZCACHE_CTLSTAT, 1));
-    radeon_cs_write_dword(cs, R600_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE);
-    radeon_cs_write_dword(cs, cmdwait(rmesa->radeonScreen,
-                               R300_WAIT_3D | R300_WAIT_3D_CLEAN));
+    /* to be enabled */
 }
 
 static void r600_vtbl_pre_emit_atoms(radeonContextPtr radeon)
 {
-       r600ContextPtr r600 = (r600ContextPtr)radeon;
-       BATCH_LOCALS(radeon);
-       
-       r600->vap_flush_needed = GL_TRUE;
-       
-       cp_wait(radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
-       BEGIN_BATCH_NO_AUTOSTATE(2);
-       OUT_BATCH_REGVAL(R600_TX_INVALTAGS, R600_TX_FLUSH);
-       END_BATCH();
-       end_3d(radeon);
+       /* to be enabled */
 }
 
 static void r600_fallback(GLcontext *ctx, GLuint bit, GLboolean mode)
@@ -268,13 +237,68 @@ static void r600_fallback(GLcontext *ctx, GLuint bit, GLboolean mode)
 static void r600_init_vtbl(radeonContextPtr radeon)
 {
        radeon->vtbl.get_lock = r600_get_lock;
-       radeon->vtbl.update_viewport_offset = r600UpdateViewportOffset;
+       radeon->vtbl.update_viewport_offset = r700UpdateViewportOffset;
        radeon->vtbl.emit_cs_header = r600_vtbl_emit_cs_header;
        radeon->vtbl.swtcl_flush = r600_swtcl_flush;
        radeon->vtbl.pre_emit_atoms = r600_vtbl_pre_emit_atoms;
        radeon->vtbl.fallback = r600_fallback;
 }
 
+/* to be enabled */
+static void r600EmitShader(GLcontext * ctx, 
+                   struct r600_dma_region *rvb,
+                              GLvoid * data, 
+                   int sizeinDWORD) 
+{
+}
+/* to be enabled */
+static void r600FreeDmaRegion(context_t *context, 
+                              struct r600_dma_region *region)
+{
+}
+/* to be enabled */
+static void r600EmitVec(GLcontext * ctx, 
+                 struct r600_dma_region *rvb,
+                            GLvoid * data, 
+                 int size, 
+                 int stride, 
+                 int count)
+{
+}
+/* to be enabled */
+static void r600ReleaseArrays(GLcontext * ctx)
+{
+}
+/* to be enabled */
+static GLboolean r600LoadMemSurf(context_t *context,
+                               GLuint     dst_offset, /* gpu addr */
+                               GLuint     dst_pitch_in_pixel,                               
+                               GLuint     src_width_in_pixel,
+                               GLuint     height,
+                               GLuint     byte_per_pixel,
+                               unsigned char* pSrc) /* source data */
+{
+    return GL_TRUE;
+}
+/* to be enabled */
+static GLboolean r600AllocMemSurf(context_t   *context,
+                           void       **ppmemBlock,
+                           void       **ppheap,
+                           GLuint      *prefered_heap, /* Now used RADEON_LOCAL_TEX_HEAP, return actual heap used. */
+                           GLuint       totalSize)
+{
+}
+/* to be enabled */
+static int  r600FlushCmdBuffer(context_t *context)
+{
+    int ret = 0;
+
+    return ret;
+}
+/* to be enabled */
+static void r600MemUse(context_t *context, int id)
+{
+}
 
 /* Create the device specific rendering context.
  */
@@ -315,10 +339,18 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual,
         * (the texture functions are especially important)
         */
        _mesa_init_driver_functions(&functions);
-       r600InitIoctlFuncs(&functions);
-       r600InitStateFuncs(&functions);
-       r600InitTextureFuncs(&functions);
-       r600InitShaderFuncs(&functions);
+
+       r700InitChipObject(r600);  /* let the eag... */
+
+    (r600->chipobj.InitFuncs)(&functions);
+    r600->chipobj.EmitShader     = r600EmitShader;
+    r600->chipobj.FreeDmaRegion  = r600FreeDmaRegion;
+    r600->chipobj.EmitVec        = r600EmitVec;
+    r600->chipobj.ReleaseArrays  = r600ReleaseArrays;
+    r600->chipobj.LoadMemSurf    = r600LoadMemSurf;
+    r600->chipobj.AllocMemSurf   = r600AllocMemSurf;
+    r600->chipobj.FlushCmdBuffer = r600FlushCmdBuffer;
+    r600->chipobj.MemUse         = r600MemUse;
 
        if (!radeonInitContext(&r600->radeon, &functions,
                               glVisual, driContextPriv,
@@ -345,18 +377,20 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual,
        ctx->Const.MaxTextureMaxAnisotropy = 16.0;
        ctx->Const.MaxTextureLodBias = 16.0;
 
-       ctx->Const.MaxTextureLevels = 13;
-       ctx->Const.MaxTextureRectSize = 4096;
+       if (screen->chip_family >= CHIP_FAMILY_RV515) {
+           ctx->Const.MaxTextureLevels = 13;
+           ctx->Const.MaxTextureRectSize = 4096;
+       }
 
-       ctx->Const.MinPointSize = 1.0;
-       ctx->Const.MinPointSizeAA = 1.0;
-       ctx->Const.MaxPointSize = R600_POINTSIZE_MAX;
-       ctx->Const.MaxPointSizeAA = R600_POINTSIZE_MAX;
+       ctx->Const.MinPointSize   = 0x0001 / 8.0;
+       ctx->Const.MinPointSizeAA = 0x0001 / 8.0;
+       ctx->Const.MaxPointSize   = 0xffff / 8.0;
+       ctx->Const.MaxPointSizeAA = 0xffff / 8.0;
 
-       ctx->Const.MinLineWidth = 1.0;
-       ctx->Const.MinLineWidthAA = 1.0;
-       ctx->Const.MaxLineWidth = R600_LINESIZE_MAX;
-       ctx->Const.MaxLineWidthAA = R600_LINESIZE_MAX;
+       ctx->Const.MinLineWidth   = 0x0001 / 8.0;
+       ctx->Const.MinLineWidthAA = 0x0001 / 8.0;
+       ctx->Const.MaxLineWidth   = 0xffff / 8.0;
+       ctx->Const.MaxLineWidthAA = 0xffff / 8.0;
 
        /* Needs further modifications */
 #if 0
@@ -378,7 +412,7 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual,
        /* Install the customized pipeline:
         */
        _tnl_destroy_pipeline(ctx);
-       _tnl_install_pipeline(ctx, r600_pipeline);
+       _tnl_install_pipeline(ctx, (const struct tnl_pipeline_stage **)(r600->chipobj.stages));
 
        /* Try and keep materials and vertices separate:
         */
@@ -392,15 +426,17 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual,
        _tnl_allow_vertex_fog(ctx, GL_TRUE);
 
        /* currently bogus data */
-       ctx->Const.VertexProgram.MaxInstructions = VSF_MAX_FRAGMENT_LENGTH / 4;
-       ctx->Const.VertexProgram.MaxNativeInstructions =
-               VSF_MAX_FRAGMENT_LENGTH / 4;
-       ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */
-       ctx->Const.VertexProgram.MaxTemps = 32;
-       ctx->Const.VertexProgram.MaxNativeTemps =
-               /*VSF_MAX_FRAGMENT_TEMPS */ 32;
-       ctx->Const.VertexProgram.MaxNativeParameters = 256;     /* r420 */
-       ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
+       if (screen->chip_flags & RADEON_CHIPSET_TCL) {
+               ctx->Const.VertexProgram.MaxInstructions = VSF_MAX_FRAGMENT_LENGTH / 4;
+               ctx->Const.VertexProgram.MaxNativeInstructions =
+                 VSF_MAX_FRAGMENT_LENGTH / 4;
+               ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */
+               ctx->Const.VertexProgram.MaxTemps = 32;
+               ctx->Const.VertexProgram.MaxNativeTemps =
+                 /*VSF_MAX_FRAGMENT_TEMPS */ 32;
+               ctx->Const.VertexProgram.MaxNativeParameters = 256;     /* r420 */
+               ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
+       }
 
        ctx->Const.FragmentProgram.MaxNativeTemps = PFS_NUM_TEMP_REGS;
        ctx->Const.FragmentProgram.MaxNativeAttribs = 11;       /* copy i915... */
@@ -438,10 +474,13 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual,
                            "disable_lowimpact_fallback");
        radeon_fbo_init(&r600->radeon);
        radeonInitSpanFuncs( ctx );
+
        r600InitCmdBuf(r600);
+#if 0 /* to be enabled */
        r600InitState(r600);
        if (!(screen->chip_flags & RADEON_CHIPSET_TCL))
                r600InitSwtcl(ctx);
+#endif
 
        TNL_CONTEXT(ctx)->Driver.RunPipeline = r600RunPipeline;
 
index 0decf5d6a937d157e5e964ed6ebbde08ba38b8a8..74e34e3684d0a0d1e786287d9446fb43039341f5 100644 (file)
@@ -52,6 +52,7 @@ struct r600_context;
 typedef struct r600_context r600ContextRec;
 typedef struct r600_context *r600ContextPtr;
 
+typedef struct r600_context context_t;
 
 #include "main/mm.h"
 
@@ -71,11 +72,6 @@ typedef struct r600_context *r600ContextPtr;
                } \
        }
 
-#include "r600_vertprog.h"
-#include "r700_fragprog.h"
-
-
-
 /************ DMA BUFFERS **************/
 
 /* The blit width for texture uploads
@@ -87,316 +83,6 @@ struct r600_texture_state {
        int tc_count;           /* number of incoming texture coordinates from VAP */
 };
 
-
-#define R600_VPT_CMD_0         0
-#define R600_VPT_XSCALE                1
-#define R600_VPT_XOFFSET       2
-#define R600_VPT_YSCALE                3
-#define R600_VPT_YOFFSET       4
-#define R600_VPT_ZSCALE                5
-#define R600_VPT_ZOFFSET       6
-#define R600_VPT_CMDSIZE       7
-
-#define R600_VIR_CMD_0         0       /* vir is variable size (at least 1) */
-#define R600_VIR_CNTL_0                1
-#define R600_VIR_CNTL_1                2
-#define R600_VIR_CNTL_2                3
-#define R600_VIR_CNTL_3                4
-#define R600_VIR_CNTL_4                5
-#define R600_VIR_CNTL_5                6
-#define R600_VIR_CNTL_6                7
-#define R600_VIR_CNTL_7                8
-#define R600_VIR_CMDSIZE       9
-
-#define R600_VIC_CMD_0         0
-#define R600_VIC_CNTL_0                1
-#define R600_VIC_CNTL_1                2
-#define R600_VIC_CMDSIZE       3
-
-#define R600_VOF_CMD_0         0
-#define R600_VOF_CNTL_0                1
-#define R600_VOF_CNTL_1                2
-#define R600_VOF_CMDSIZE       3
-
-#define R600_PVS_CMD_0         0
-#define R600_PVS_CNTL_1                1
-#define R600_PVS_CNTL_2                2
-#define R600_PVS_CNTL_3                3
-#define R600_PVS_CMDSIZE       4
-
-#define R600_GB_MISC_CMD_0             0
-#define R600_GB_MISC_MSPOS_0           1
-#define R600_GB_MISC_MSPOS_1           2
-#define R600_GB_MISC_TILE_CONFIG       3
-#define R600_GB_MISC_SELECT            4
-#define R600_GB_MISC_AA_CONFIG         5
-#define R600_GB_MISC_CMDSIZE           6
-
-#define R600_TXE_CMD_0         0
-#define R600_TXE_ENABLE                1
-#define R600_TXE_CMDSIZE       2
-
-#define R600_PS_CMD_0          0
-#define R600_PS_POINTSIZE      1
-#define R600_PS_CMDSIZE                2
-
-#define R600_ZBS_CMD_0         0
-#define R600_ZBS_T_FACTOR      1
-#define R600_ZBS_T_CONSTANT    2
-#define R600_ZBS_W_FACTOR      3
-#define R600_ZBS_W_CONSTANT    4
-#define R600_ZBS_CMDSIZE       5
-
-#define R600_CUL_CMD_0         0
-#define R600_CUL_CULL          1
-#define R600_CUL_CMDSIZE       2
-
-#define R600_RC_CMD_0          0
-#define R600_RC_CNTL_0         1
-#define R600_RC_CNTL_1         2
-#define R600_RC_CMDSIZE                3
-
-#define R600_RI_CMD_0          0
-#define R600_RI_INTERP_0       1
-#define R600_RI_INTERP_1       2
-#define R600_RI_INTERP_2       3
-#define R600_RI_INTERP_3       4
-#define R600_RI_INTERP_4       5
-#define R600_RI_INTERP_5       6
-#define R600_RI_INTERP_6       7
-#define R600_RI_INTERP_7       8
-#define R600_RI_CMDSIZE                9
-
-#define R500_RI_CMDSIZE               17
-
-#define R600_RR_CMD_0          0       /* rr is variable size (at least 1) */
-#define R600_RR_INST_0         1
-#define R600_RR_INST_1         2
-#define R600_RR_INST_2         3
-#define R600_RR_INST_3         4
-#define R600_RR_INST_4         5
-#define R600_RR_INST_5         6
-#define R600_RR_INST_6         7
-#define R600_RR_INST_7         8
-#define R600_RR_CMDSIZE                9
-
-#define R600_FP_CMD_0          0
-#define R600_FP_CNTL0          1
-#define R600_FP_CNTL1          2
-#define R600_FP_CNTL2          3
-#define R600_FP_CMD_1          4
-#define R600_FP_NODE0          5
-#define R600_FP_NODE1          6
-#define R600_FP_NODE2          7
-#define R600_FP_NODE3          8
-#define R600_FP_CMDSIZE                9
-
-#define R500_FP_CMD_0           0
-#define R500_FP_CNTL            1
-#define R500_FP_PIXSIZE         2
-#define R500_FP_CMD_1           3
-#define R500_FP_CODE_ADDR       4
-#define R500_FP_CODE_RANGE      5
-#define R500_FP_CODE_OFFSET     6
-#define R500_FP_CMD_2           7
-#define R500_FP_FC_CNTL         8
-#define R500_FP_CMDSIZE         9
-
-#define R600_FPT_CMD_0         0
-#define R600_FPT_INSTR_0       1
-#define R600_FPT_CMDSIZE       65
-
-#define R600_FPI_CMD_0         0
-#define R600_FPI_INSTR_0       1
-#define R600_FPI_CMDSIZE       65
-/* R500 has space for 512 instructions - 6 dwords per instruction */
-#define R500_FPI_CMDSIZE       (512*6+1)
-
-#define R600_FPP_CMD_0         0
-#define R600_FPP_PARAM_0       1
-#define R600_FPP_CMDSIZE       (32*4+1)
-/* R500 has spcae for 256 constants - 4 dwords per constant */
-#define R500_FPP_CMDSIZE       (256*4+1)
-
-#define R600_FOGS_CMD_0                0
-#define R600_FOGS_STATE                1
-#define R600_FOGS_CMDSIZE      2
-
-#define R600_FOGC_CMD_0                0
-#define R600_FOGC_R            1
-#define R600_FOGC_G            2
-#define R600_FOGC_B            3
-#define R600_FOGC_CMDSIZE      4
-
-#define R600_FOGP_CMD_0                0
-#define R600_FOGP_SCALE                1
-#define R600_FOGP_START                2
-#define R600_FOGP_CMDSIZE      3
-
-#define R600_AT_CMD_0          0
-#define R600_AT_ALPHA_TEST     1
-#define R600_AT_UNKNOWN                2
-#define R600_AT_CMDSIZE                3
-
-#define R600_BLD_CMD_0         0
-#define R600_BLD_CBLEND                1
-#define R600_BLD_ABLEND                2
-#define R600_BLD_CMDSIZE       3
-
-#define R600_CMK_CMD_0         0
-#define R600_CMK_COLORMASK     1
-#define R600_CMK_CMDSIZE       2
-
-#define R600_CB_CMD_0          0
-#define R600_CB_OFFSET         1
-#define R600_CB_CMD_1          2
-#define R600_CB_PITCH          3
-#define R600_CB_CMDSIZE                4
-
-#define R600_ZS_CMD_0          0
-#define R600_ZS_CNTL_0         1
-#define R600_ZS_CNTL_1         2
-#define R600_ZS_CNTL_2         3
-#define R600_ZS_CMDSIZE                4
-
-#define R600_ZB_CMD_0          0
-#define R600_ZB_OFFSET         1
-#define R600_ZB_PITCH          2
-#define R600_ZB_CMDSIZE                3
-
-#define R600_VAP_CNTL_FLUSH     0
-#define R600_VAP_CNTL_FLUSH_1   1
-#define R600_VAP_CNTL_CMD       2
-#define R600_VAP_CNTL_INSTR     3
-#define R600_VAP_CNTL_SIZE      4
-
-#define R600_VPI_CMD_0         0
-#define R600_VPI_INSTR_0       1
-#define R600_VPI_CMDSIZE       1025    /* 256 16 byte instructions */
-
-#define R600_VPP_CMD_0         0
-#define R600_VPP_PARAM_0       1
-#define R600_VPP_CMDSIZE       1025    /* 256 4-component parameters */
-
-#define R600_VPUCP_CMD_0               0
-#define R600_VPUCP_X            1
-#define R600_VPUCP_Y            2
-#define R600_VPUCP_Z            3
-#define R600_VPUCP_W            4
-#define R600_VPUCP_CMDSIZE     5       /* 256 4-component parameters */
-
-#define R600_VPS_CMD_0         0
-#define R600_VPS_ZERO_0                1
-#define R600_VPS_ZERO_1                2
-#define R600_VPS_POINTSIZE     3
-#define R600_VPS_ZERO_3                4
-#define R600_VPS_CMDSIZE       5
-
-       /* the layout is common for all fields inside tex */
-#define R600_TEX_CMD_0         0
-#define R600_TEX_VALUE_0       1
-/* We don't really use this, instead specify mtu+1 dynamically
-#define R600_TEX_CMDSIZE       (MAX_TEXTURE_UNITS+1)
-*/
-
-/**
- * Cache for hardware register state.
- */
-struct r600_hw_state {
-       struct radeon_state_atom vpt;   /* viewport (1D98) */
-       struct radeon_state_atom vap_cntl;
-        struct radeon_state_atom vap_index_offset; /* 0x208c r5xx only */
-       struct radeon_state_atom vof;   /* VAP output format register 0x2090 */
-       struct radeon_state_atom vte;   /* (20B0) */
-       struct radeon_state_atom vap_vf_max_vtx_indx;   /* Maximum Vertex Indx Clamp (2134) */
-       struct radeon_state_atom vap_cntl_status;
-       struct radeon_state_atom vir[2];        /* vap input route (2150/21E0) */
-       struct radeon_state_atom vic;   /* vap input control (2180) */
-       struct radeon_state_atom vap_psc_sgn_norm_cntl; /* Programmable Stream Control Signed Normalize Control (21DC) */
-       struct radeon_state_atom vap_clip_cntl;
-       struct radeon_state_atom vap_clip;
-       struct radeon_state_atom vap_pvs_vtx_timeout_reg;       /* Vertex timeout register (2288) */
-       struct radeon_state_atom pvs;   /* pvs_cntl (22D0) */
-       struct radeon_state_atom gb_enable;     /* (4008) */
-       struct radeon_state_atom gb_misc;       /* Multisampling position shifts ? (4010) */
-       struct radeon_state_atom ga_point_s0;   /* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) (4200) */
-       struct radeon_state_atom ga_triangle_stipple;   /* (4214) */
-       struct radeon_state_atom ps;    /* pointsize (421C) */
-       struct radeon_state_atom ga_point_minmax;       /* (4230) */
-       struct radeon_state_atom lcntl; /* line control */
-       struct radeon_state_atom ga_line_stipple;       /* (4260) */
-       struct radeon_state_atom shade;
-       struct radeon_state_atom polygon_mode;
-       struct radeon_state_atom fogp;  /* fog parameters (4294) */
-       struct radeon_state_atom ga_soft_reset; /* (429C) */
-       struct radeon_state_atom zbias_cntl;
-       struct radeon_state_atom zbs;   /* zbias (42A4) */
-       struct radeon_state_atom occlusion_cntl;
-       struct radeon_state_atom cul;   /* cull cntl (42B8) */
-       struct radeon_state_atom su_depth_scale;        /* (42C0) */
-       struct radeon_state_atom rc;    /* rs control (4300) */
-       struct radeon_state_atom ri;    /* rs interpolators (4310) */
-       struct radeon_state_atom rr;    /* rs route (4330) */
-       struct radeon_state_atom sc_hyperz;     /* (43A4) */
-       struct radeon_state_atom sc_screendoor; /* (43E8) */
-       struct radeon_state_atom fp;    /* fragment program cntl + nodes (4600) */
-       struct radeon_state_atom fpt;   /* texi - (4620) */
-       struct radeon_state_atom us_out_fmt;    /* (46A4) */
-       struct radeon_state_atom r500fp;        /* r500 fp instructions */
-       struct radeon_state_atom r500fp_const;  /* r500 fp constants */
-       struct radeon_state_atom fpi[4];        /* fp instructions (46C0/47C0/48C0/49C0) */
-       struct radeon_state_atom fogs;  /* fog state (4BC0) */
-       struct radeon_state_atom fogc;  /* fog color (4BC8) */
-       struct radeon_state_atom at;    /* alpha test (4BD4) */
-       struct radeon_state_atom fg_depth_src;  /* (4BD8) */
-       struct radeon_state_atom fpp;   /* 0x4C00 and following */
-       struct radeon_state_atom rb3d_cctl;     /* (4E00) */
-       struct radeon_state_atom bld;   /* blending (4E04) */
-       struct radeon_state_atom cmk;   /* colormask (4E0C) */
-       struct radeon_state_atom blend_color;   /* constant blend color */
-       struct radeon_state_atom rop;   /* ropcntl */
-       struct radeon_state_atom cb;    /* colorbuffer (4E28) */
-       struct radeon_state_atom rb3d_dither_ctl;       /* (4E50) */
-       struct radeon_state_atom rb3d_aaresolve_ctl;    /* (4E88) */
-       struct radeon_state_atom rb3d_discard_src_pixel_lte_threshold;  /* (4E88) I saw it only written on RV350 hardware..  */
-       struct radeon_state_atom zs;    /* zstencil control (4F00) */
-       struct radeon_state_atom zstencil_format;
-       struct radeon_state_atom zb;    /* z buffer (4F20) */
-       struct radeon_state_atom zb_depthclearvalue;    /* (4F28) */
-       struct radeon_state_atom unk4F30;       /* (4F30) */
-       struct radeon_state_atom zb_hiz_offset; /* (4F44) */
-       struct radeon_state_atom zb_hiz_pitch;  /* (4F54) */
-
-       struct radeon_state_atom vpi;   /* vp instructions */
-       struct radeon_state_atom vpp;   /* vp parameters */
-       struct radeon_state_atom vps;   /* vertex point size (?) */
-       struct radeon_state_atom vpucp[6];      /* vp user clip plane - 6 */
-       /* 8 texture units */
-       /* the state is grouped by function and not by
-          texture unit. This makes single unit updates
-          really awkward - we are much better off
-          updating the whole thing at once */
-       struct {
-               struct radeon_state_atom filter;
-               struct radeon_state_atom filter_1;
-               struct radeon_state_atom size;
-               struct radeon_state_atom format;
-               struct radeon_state_atom pitch;
-               struct radeon_state_atom offset;
-               struct radeon_state_atom chroma_key;
-               struct radeon_state_atom border_color;
-       } tex;
-       struct radeon_state_atom txe;   /* tex enable (4104) */
-
-       radeonTexObj *textures[R600_MAX_TEXTURE_UNITS];
-};
-
-/**
- * State cache
- */
-
-/* Vertex shader state */
-
 /* Perhaps more if we store programs in vmem? */
 /* drm_r600_cmd_header_t->vpu->count is unsigned char */
 #define VSF_MAX_FRAGMENT_LENGTH (255*4)
@@ -407,19 +93,6 @@ struct r600_hw_state {
 #define STATE_R600_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
 #define STATE_R600_TEXRECT_FACTOR (STATE_INTERNAL_DRIVER+1)
 
-struct r600_vertex_shader_fragment {
-       int length;
-       union {
-               GLuint d[VSF_MAX_FRAGMENT_LENGTH];
-               float f[VSF_MAX_FRAGMENT_LENGTH];
-               GLuint i[VSF_MAX_FRAGMENT_LENGTH];
-       } body;
-};
-
-struct r600_vertex_shader_state {
-       struct r600_vertex_shader_fragment program;
-};
-
 extern int hw_tcl_on;
 
 #define COLOR_IS_RGBA
@@ -427,219 +100,18 @@ extern int hw_tcl_on;
 #include "tnl_dd/t_dd_vertex.h"
 #undef TAG
 
-//#define CURRENT_VERTEX_SHADER(ctx) (ctx->VertexProgram._Current)
-#define CURRENT_VERTEX_SHADER(ctx) (R600_CONTEXT(ctx)->selected_vp)
-
-/* Should but doesnt work */
-//#define CURRENT_VERTEX_SHADER(ctx) (R600_CONTEXT(ctx)->curr_vp)
-
-/* r600_vertex_shader_state and r600_vertex_program should probably be merged together someday.
- * Keeping them them seperate for now should ensure fixed pipeline keeps functioning properly.
- */
-
-struct r600_vertex_program_key {
-       GLuint InputsRead;
-       GLuint OutputsWritten;
-       GLuint OutputsAdded;
-};
-
-struct r600_vertex_program {
-       struct r600_vertex_program *next;
-       struct r600_vertex_program_key key;
-       int translated;
-
-       struct r600_vertex_shader_fragment program;
-
-       int pos_end;
-       int num_temporaries;    /* Number of temp vars used by program */
-       int wpos_idx;
-       int inputs[VERT_ATTRIB_MAX];
-       int outputs[VERT_RESULT_MAX];
-       int native;
-       int ref_count;
-       int use_ref_count;
-};
-
-struct r600_vertex_program_cont {
-       struct gl_vertex_program mesa_program;  /* Must be first */
-       struct r600_vertex_shader_fragment params;
-       struct r600_vertex_program *progs;
-};
-
 #define PFS_MAX_ALU_INST       64
 #define PFS_MAX_TEX_INST       64
 #define PFS_MAX_TEX_INDIRECT 4
 #define PFS_NUM_TEMP_REGS      32
 #define PFS_NUM_CONST_REGS     16
 
-struct r600_pfs_compile_state;
-
-
-/**
- * Stores state that influences the compilation of a fragment program.
- */
-struct r600_fragment_program_external_state {
-       struct {
-               /**
-                * If the sampler is used as a shadow sampler,
-                * this field is:
-                *  0 - GL_LUMINANCE
-                *  1 - GL_INTENSITY
-                *  2 - GL_ALPHA
-                * depending on the depth texture mode.
-                */
-               GLuint depth_texture_mode : 2;
-
-               /**
-                * If the sampler is used as a shadow sampler,
-                * this field is (texture_compare_func - GL_NEVER).
-                * [e.g. if compare function is GL_LEQUAL, this field is 3]
-                *
-                * Otherwise, this field is 0.
-                */
-               GLuint texture_compare_func : 3;
-       } unit[16];
-};
-
-
-struct r600_fragment_program_node {
-       int tex_offset; /**< first tex instruction */
-       int tex_end; /**< last tex instruction, relative to tex_offset */
-       int alu_offset; /**< first ALU instruction */
-       int alu_end; /**< last ALU instruction, relative to alu_offset */
-       int flags;
-};
-
-/**
- * Stores an R600 fragment program in its compiled-to-hardware form.
- */
-struct r600_fragment_program_code {
-       struct {
-               int length; /**< total # of texture instructions used */
-               GLuint inst[PFS_MAX_TEX_INST];
-       } tex;
-
-       struct {
-               int length; /**< total # of ALU instructions used */
-               struct {
-                       GLuint inst0;
-                       GLuint inst1;
-                       GLuint inst2;
-                       GLuint inst3;
-               } inst[PFS_MAX_ALU_INST];
-       } alu;
-
-       struct r600_fragment_program_node node[4];
-       int cur_node;
-       int first_node_has_tex;
-
-       /**
-        * Remember which program register a given hardware constant
-        * belongs to.
-        */
-       struct prog_src_register constant[PFS_NUM_CONST_REGS];
-       int const_nr;
-
-       int max_temp_idx;
-};
-
-/**
- * Store everything about a fragment program that is needed
- * to render with that program.
- */
-struct r600_fragment_program {
-       struct gl_fragment_program mesa_program;
-
-       GLboolean translated;
-       GLboolean error;
-
-       struct r600_fragment_program_external_state state;
-       struct r600_fragment_program_code code;
-
-       GLboolean WritesDepth;
-       GLuint optimization;
-};
-
-struct r500_pfs_compile_state;
-
-struct r500_fragment_program_external_state {
-       struct {
-               /**
-                * If the sampler is used as a shadow sampler,
-                * this field is:
-                *  0 - GL_LUMINANCE
-                *  1 - GL_INTENSITY
-                *  2 - GL_ALPHA
-                * depending on the depth texture mode.
-                */
-               GLuint depth_texture_mode : 2;
-
-               /**
-                * If the sampler is used as a shadow sampler,
-                * this field is (texture_compare_func - GL_NEVER).
-                * [e.g. if compare function is GL_LEQUAL, this field is 3]
-                *
-                * Otherwise, this field is 0.
-                */
-               GLuint texture_compare_func : 3;
-       } unit[16];
-};
-
-struct r500_fragment_program_code {
-       struct {
-               GLuint inst0;
-               GLuint inst1;
-               GLuint inst2;
-               GLuint inst3;
-               GLuint inst4;
-               GLuint inst5;
-       } inst[512];
-
-       int inst_offset;
-       int inst_end;
-
-       /**
-        * Remember which program register a given hardware constant
-        * belongs to.
-        */
-       struct prog_src_register constant[PFS_NUM_CONST_REGS];
-       int const_nr;
-
-       int max_temp_idx;
-};
-
-struct r500_fragment_program {
-       struct gl_fragment_program mesa_program;
-
-       GLcontext *ctx;
-       GLboolean translated;
-       GLboolean error;
-
-       struct r500_fragment_program_external_state state;
-       struct r500_fragment_program_code code;
-
-       GLboolean writes_depth;
-
-       GLuint optimization;
-};
-
 #define R600_MAX_AOS_ARRAYS            16
 
 #define REG_COORDS     0
 #define REG_COLOR0     1
 #define REG_TEX0       2
 
-struct r600_state {
-       struct r600_texture_state texture;
-       int sw_tcl_inputs[VERT_ATTRIB_MAX];
-       struct r600_vertex_shader_state vertex_shader;
-
-
-       DECLARE_RENDERINPUTS(render_inputs_bitset);     /* actual render inputs that R600 was configured for.
-                                                          They are the same as tnl->render_inputs for fixed pipeline */
-
-};
-
 #define R600_FALLBACK_NONE 0
 #define R600_FALLBACK_TCL 1
 #define R600_FALLBACK_RAST 2
@@ -668,6 +140,71 @@ struct r600_swtcl_info {
    GLubyte vertex_attr_count;
 };
 
+/* to be enabled */ /* DELETE FOLLOWING 2 SRTUCTS */
+struct r600_dma_buffer 
+{
+    int         refcount;       /* the number of retained regions in buf */
+    drmBufPtr   buf;
+    int         id;
+};
+struct r600_dma_region 
+{
+    struct r600_dma_buffer  *buf;
+    char        *address;       /* == buf->address */
+    int         start;
+    int         end;
+    int         ptr;            /* offsets from start of buf */
+
+    int         aos_offset;     /* address in GART memory */
+    int         aos_stride;     /* distance between elements, in dwords */
+    int         aos_size;       /* number of components (1-4) */
+};
+/* ----------------------- */
+
+typedef struct chip_object
+{
+    void      *pvChipObj;
+
+    /* ------------  OUT ------------------- */
+    GLboolean (*DestroyChipObj)(void* pvChipObj);
+
+    void      (*InitFuncs)(struct dd_function_table *functions);
+
+    void      (*InitState)(GLcontext * ctx);
+
+    GLuint    (*GetTexObjSize)(void);
+
+    /* ------------  IN  ------------------- */
+    void      (*EmitShader)( GLcontext * ctx, 
+                             struct r600_dma_region *rvb,
+                                        GLvoid * data, 
+                             int sizeinDWORD);
+    void      (*FreeDmaRegion)( GLcontext * ctx, 
+                                struct r600_dma_region *region);
+    void      (*EmitVec)(GLcontext * ctx, 
+                         struct r600_dma_region *rvb,
+                                    GLvoid * data, 
+                         int size, 
+                         int stride, 
+                         int count);
+    void      (*MemUse)(context_t *context, int id);
+    void      (*ReleaseArrays)(GLcontext * ctx);
+    int       (*FlushCmdBuffer)(GLcontext * ctx);
+    GLboolean (*LoadMemSurf)(context_t *context,
+                               GLuint     dst_offset, /* gpu addr */
+                               GLuint     dst_pitch_in_pixel,                               
+                               GLuint     src_width_in_pixel,
+                               GLuint     height,
+                               GLuint     byte_per_pixel,
+                               unsigned char* pSrc); /* source data */    
+    GLboolean (*AllocMemSurf)(context_t   *context,
+                           void  **ppmemBlock,
+                           void  **ppheap,
+                           GLuint      *prefered_heap, 
+                           GLuint       totalSize);
+    
+    struct tnl_pipeline_stage **stages;
+} chip_object;
 
 /**
  * \brief R600 context structure.
@@ -675,11 +212,8 @@ struct r600_swtcl_info {
 struct r600_context {
        struct radeon_context radeon;   /* parent class, must be first */
 
-       struct r600_hw_state hw;
-
-       struct r600_state state;
-       struct gl_vertex_program *curr_vp;
-       struct r600_vertex_program *selected_vp;
+    /* ------ */
+    chip_object chipobj;
 
        /* Vertex buffers
         */
@@ -693,18 +227,14 @@ struct r600_context {
 };
 
 #define R600_CONTEXT(ctx)              ((r600ContextPtr)(ctx->DriverCtx))
+#define R700_CONTEXT(ctx)              ((context_t *)(ctx->DriverCtx))
+#define GL_CONTEXT(context)     ((GLcontext *)(context->radeon.glCtx))
 
 extern void r600DestroyContext(__DRIcontextPrivate * driContextPriv);
 extern GLboolean r600CreateContext(const __GLcontextModes * glVisual,
                                   __DRIcontextPrivate * driContextPriv,
                                   void *sharedContextPrivate);
 
-extern void r600SelectVertexShader(r600ContextPtr r600);
-extern void r600InitShaderFuncs(struct dd_function_table *functions);
-extern int r600VertexProgUpdateParams(GLcontext * ctx,
-                                     struct r600_vertex_program_cont *vp,
-                                     float *dst);
-
 #define RADEON_D_CAPTURE 0
 #define RADEON_D_PLAYBACK 1
 #define RADEON_D_PLAYBACK_RAW 2
index 7004f7a2b7cc593d472a9da7e7e38926c3415f78..9d43873c4d859f8f96e5aae625c295aa60014895 100644 (file)
@@ -46,156 +46,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "tnl/t_context.h"
 
 #include "r600_context.h"
-#include "r600_state.h"
 #include "r600_emit.h"
-#include "r600_ioctl.h"
-
-
-#if SWIZZLE_X != R600_INPUT_ROUTE_SELECT_X || \
-    SWIZZLE_Y != R600_INPUT_ROUTE_SELECT_Y || \
-    SWIZZLE_Z != R600_INPUT_ROUTE_SELECT_Z || \
-    SWIZZLE_W != R600_INPUT_ROUTE_SELECT_W || \
-    SWIZZLE_ZERO != R600_INPUT_ROUTE_SELECT_ZERO || \
-    SWIZZLE_ONE != R600_INPUT_ROUTE_SELECT_ONE
-#error Cannot change these!
-#endif
-
-#define DEBUG_ALL DEBUG_VERTS
-
-#define DW_SIZE(x) ((inputs[tab[(x)]] << R600_DST_VEC_LOC_SHIFT) |     \
-                   (attribptr[tab[(x)]]->size - 1) << R600_DATA_TYPE_0_SHIFT)
-
-GLuint r600VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr,
-                                int *inputs, GLint * tab, GLuint nr)
-{
-       GLuint i, dw;
-
-       /* type, inputs, stop bit, size */
-       for (i = 0; i < nr; i += 2) {
-               /* make sure input is valid, would lockup the gpu */
-               assert(inputs[tab[i]] != -1);
-               dw = (R600_SIGNED | DW_SIZE(i));
-               if (i + 1 == nr) {
-                       dw |= R600_LAST_VEC << R600_DATA_TYPE_0_SHIFT;
-               } else {
-                       assert(inputs[tab[i + 1]] != -1);
-                       dw |= (R600_SIGNED |
-                              DW_SIZE(i + 1)) << R600_DATA_TYPE_1_SHIFT;
-                       if (i + 2 == nr) {
-                               dw |= R600_LAST_VEC << R600_DATA_TYPE_1_SHIFT;
-                       }
-               }
-               dst[i >> 1] = dw;
-       }
-
-       return (nr + 1) >> 1;
-}
-
-static GLuint r600VAPInputRoute1Swizzle(int swizzle[4])
-{
-       return (swizzle[0] << R600_SWIZZLE_SELECT_X_SHIFT) |
-           (swizzle[1] << R600_SWIZZLE_SELECT_Y_SHIFT) |
-           (swizzle[2] << R600_SWIZZLE_SELECT_Z_SHIFT) |
-           (swizzle[3] << R600_SWIZZLE_SELECT_W_SHIFT);
-}
-
-GLuint r600VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr)
-{
-       GLuint i, dw;
-
-       for (i = 0; i < nr; i += 2) {
-               dw = (r600VAPInputRoute1Swizzle(swizzle[i]) |
-                     ((R600_WRITE_ENA_X | R600_WRITE_ENA_Y |
-                       R600_WRITE_ENA_Z | R600_WRITE_ENA_W) << R600_WRITE_ENA_SHIFT)) << R600_SWIZZLE0_SHIFT;
-               if (i + 1 < nr) {
-                       dw |= (r600VAPInputRoute1Swizzle(swizzle[i + 1]) |
-                              ((R600_WRITE_ENA_X | R600_WRITE_ENA_Y |
-                                R600_WRITE_ENA_Z | R600_WRITE_ENA_W) << R600_WRITE_ENA_SHIFT)) << R600_SWIZZLE1_SHIFT;
-               }
-               dst[i >> 1] = dw;
-       }
-
-       return (nr + 1) >> 1;
-}
-
-GLuint r600VAPInputCntl0(GLcontext * ctx, GLuint InputsRead)
-{
-       /* No idea what this value means. I have seen other values written to
-        * this register... */
-       return 0x5555;
-}
-
-GLuint r600VAPInputCntl1(GLcontext * ctx, GLuint InputsRead)
-{
-       r600ContextPtr rmesa = R600_CONTEXT(ctx);
-       GLuint i, vic_1 = 0;
-
-       if (InputsRead & (1 << VERT_ATTRIB_POS))
-               vic_1 |= R600_INPUT_CNTL_POS;
-
-       if (InputsRead & (1 << VERT_ATTRIB_NORMAL))
-               vic_1 |= R600_INPUT_CNTL_NORMAL;
-
-       if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
-               vic_1 |= R600_INPUT_CNTL_COLOR;
-
-       rmesa->state.texture.tc_count = 0;
-       for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
-               if (InputsRead & (1 << (VERT_ATTRIB_TEX0 + i))) {
-                       rmesa->state.texture.tc_count++;
-                       vic_1 |= R600_INPUT_CNTL_TC0 << i;
-               }
-
-       return vic_1;
-}
-
-GLuint r600VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten)
-{
-       GLuint ret = 0;
-
-       if (OutputsWritten & (1 << VERT_RESULT_HPOS))
-               ret |= R600_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
-
-       if (OutputsWritten & (1 << VERT_RESULT_COL0))
-               ret |= R600_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT;
-
-       if (OutputsWritten & (1 << VERT_RESULT_COL1))
-               ret |= R600_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT;
-
-       if (OutputsWritten & (1 << VERT_RESULT_BFC0)
-           || OutputsWritten & (1 << VERT_RESULT_BFC1))
-               ret |=
-                   R600_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT |
-                   R600_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT |
-                   R600_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT;
-
-       if (OutputsWritten & (1 << VERT_RESULT_PSIZ))
-               ret |= R600_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
-
-       return ret;
-}
-
-GLuint r600VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten)
-{
-       GLuint i, ret = 0, first_free_texcoord = 0;
-
-       for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
-               if (OutputsWritten & (1 << (VERT_RESULT_TEX0 + i))) {
-                       ret |= (4 << (3 * i));
-                       ++first_free_texcoord;
-               }
-       }
-
-       if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
-               if (first_free_texcoord > 8) {
-                       fprintf(stderr, "\tout of free texcoords to write fog coord\n");
-                       _mesa_exit(-1);
-               }
-               ret |= 4 << (3 * first_free_texcoord);
-       }
-
-       return ret;
-}
 
 /* Emit vertex data to GART memory
  * Route inputs to the vertex processor
@@ -203,155 +54,14 @@ GLuint r600VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten)
  */
 int r600EmitArrays(GLcontext * ctx)
 {
-       r600ContextPtr rmesa = R600_CONTEXT(ctx);
-       TNLcontext *tnl = TNL_CONTEXT(ctx);
-       struct vertex_buffer *vb = &tnl->vb;
-       GLuint nr;
-       GLuint count = vb->Count;
-       GLuint i;
-       GLuint InputsRead = 0, OutputsWritten = 0;
-       int *inputs = NULL;
-       int vir_inputs[VERT_ATTRIB_MAX];
-       GLint tab[VERT_ATTRIB_MAX];
-       int swizzle[VERT_ATTRIB_MAX][4];
-       struct r600_vertex_program *prog =
-           (struct r600_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
-
-       if (hw_tcl_on) {
-               inputs = prog->inputs;
-               InputsRead = prog->key.InputsRead;
-               OutputsWritten = prog->key.OutputsWritten;
-       } else {
-               inputs = rmesa->state.sw_tcl_inputs;
-
-               DECLARE_RENDERINPUTS(render_inputs_bitset);
-               RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset);
-
-               vb->AttribPtr[VERT_ATTRIB_POS] = vb->ClipPtr;
-
-               assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS));
-               assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_NORMAL) == 0);
-
-               if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS)) {
-                       InputsRead |= 1 << VERT_ATTRIB_POS;
-                       OutputsWritten |= 1 << VERT_RESULT_HPOS;
-               }
-
-               if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR0)) {
-                       InputsRead |= 1 << VERT_ATTRIB_COLOR0;
-                       OutputsWritten |= 1 << VERT_RESULT_COL0;
-               }
-
-               if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR1)) {
-                       InputsRead |= 1 << VERT_ATTRIB_COLOR1;
-                       OutputsWritten |= 1 << VERT_RESULT_COL1;
-               }
-
-               for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
-                       if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_TEX(i))) {
-                               InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i);
-                               OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
-                       }
-               }
-
-               for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
-                       if (InputsRead & (1 << i)) {
-                               inputs[i] = nr++;
-                       } else {
-                               inputs[i] = -1;
-                       }
-               }
-
-               /* Fixed, apply to vir0 only */
-               memcpy(vir_inputs, inputs, VERT_ATTRIB_MAX * sizeof(int));
-               inputs = vir_inputs;
-               if (InputsRead & VERT_ATTRIB_POS)
-                       inputs[VERT_ATTRIB_POS] = 0;
-               if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
-                       inputs[VERT_ATTRIB_COLOR0] = 2;
-               if (InputsRead & (1 << VERT_ATTRIB_COLOR1))
-                       inputs[VERT_ATTRIB_COLOR1] = 3;
-               for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++)
-                       if (InputsRead & (1 << i))
-                               inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
-
-               RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset);
-       }
-
-       assert(InputsRead);
-       assert(OutputsWritten);
-
-       for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
-               if (InputsRead & (1 << i)) {
-                       tab[nr++] = i;
-               }
-       }
-
-       if (nr > R600_MAX_AOS_ARRAYS) {
-               return R600_FALLBACK_TCL;
-       }
-
-       for (i = 0; i < nr; i++) {
-               int ci;
-
-               swizzle[i][0] = SWIZZLE_ZERO;
-               swizzle[i][1] = SWIZZLE_ZERO;
-               swizzle[i][2] = SWIZZLE_ZERO;
-               swizzle[i][3] = SWIZZLE_ONE;
-
-               for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) {
-                       swizzle[i][ci] = ci;
-               }
-               rcommon_emit_vector(ctx, &rmesa->radeon.tcl.aos[i],
-                                   vb->AttribPtr[tab[i]]->data,
-                                   vb->AttribPtr[tab[i]]->size,
-                                   vb->AttribPtr[tab[i]]->stride, count);
-       }
-
-       /* Setup INPUT_ROUTE. */
-       if (rmesa->radeon.radeonScreen->kernel_mm) {
-               R600_STATECHANGE(rmesa, vir[0]);
-               rmesa->hw.vir[0].cmd[0] &= 0xC000FFFF;
-               rmesa->hw.vir[1].cmd[0] &= 0xC000FFFF;
-               rmesa->hw.vir[0].cmd[0] |=
-                       (r600VAPInputRoute0(&rmesa->hw.vir[0].cmd[R600_VIR_CNTL_0],
-                                           vb->AttribPtr, inputs, tab, nr) & 0x3FFF) << 16;
-               R600_STATECHANGE(rmesa, vir[1]);
-               rmesa->hw.vir[1].cmd[0] |=
-                       (r600VAPInputRoute1(&rmesa->hw.vir[1].cmd[R600_VIR_CNTL_0], swizzle,
-                                           nr) & 0x3FFF) << 16;
-       } else {
-               R600_STATECHANGE(rmesa, vir[0]);
-               ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count =
-                       r600VAPInputRoute0(&rmesa->hw.vir[0].cmd[R600_VIR_CNTL_0],
-                                          vb->AttribPtr, inputs, tab, nr);
-               R600_STATECHANGE(rmesa, vir[1]);
-               ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
-                       r600VAPInputRoute1(&rmesa->hw.vir[1].cmd[R600_VIR_CNTL_0], swizzle,
-                                          nr);
-       }
        
-       /* Setup INPUT_CNTL. */
-       R600_STATECHANGE(rmesa, vic);
-       rmesa->hw.vic.cmd[R600_VIC_CNTL_0] = r600VAPInputCntl0(ctx, InputsRead);
-       rmesa->hw.vic.cmd[R600_VIC_CNTL_1] = r600VAPInputCntl1(ctx, InputsRead);
-
-       /* Setup OUTPUT_VTX_FMT. */
-       R600_STATECHANGE(rmesa, vof);
-       rmesa->hw.vof.cmd[R600_VOF_CNTL_0] =
-           r600VAPOutputCntl0(ctx, OutputsWritten);
-       rmesa->hw.vof.cmd[R600_VOF_CNTL_1] =
-           r600VAPOutputCntl1(ctx, OutputsWritten);
-
-       rmesa->radeon.tcl.aos_count = nr;
-
        return R600_FALLBACK_NONE;
 }
 
 void r600EmitCacheFlush(r600ContextPtr rmesa)
 {
        BATCH_LOCALS(&rmesa->radeon);
-
+/*
        BEGIN_BATCH_NO_AUTOSTATE(4);
        OUT_BATCH_REGVAL(R600_RB3D_DSTCACHE_CTLSTAT,
                R600_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
@@ -361,4 +71,5 @@ void r600EmitCacheFlush(r600ContextPtr rmesa)
                R600_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
        END_BATCH();
        COMMIT_BATCH();
+*/
 }
diff --git a/src/mesa/drivers/dri/r600/r600_reg_auto_r6xx.h b/src/mesa/drivers/dri/r600/r600_reg_auto_r6xx.h
new file mode 100644 (file)
index 0000000..9d5aa3c
--- /dev/null
@@ -0,0 +1,3087 @@
+/*
+ * RadeonHD R6xx, R7xx Register documentation
+ *
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ * Copyright (C) 2008-2009  Matthias Hopf
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _AUTOREGS
+#define _AUTOREGS
+
+enum {
+
+    VGT_VTX_VECT_EJECT_REG                                = 0x000088b0,
+       PRIM_COUNT_mask                                   = 0x3ff << 0,
+       PRIM_COUNT_shift                                  = 0,
+    VGT_LAST_COPY_STATE                                   = 0x000088c0,
+       SRC_STATE_ID_mask                                 = 0x07 << 0,
+       SRC_STATE_ID_shift                                = 0,
+       DST_STATE_ID_mask                                 = 0x07 << 16,
+       DST_STATE_ID_shift                                = 16,
+    VGT_CACHE_INVALIDATION                                = 0x000088c4,
+       CACHE_INVALIDATION_mask                           = 0x03 << 0,
+       CACHE_INVALIDATION_shift                          = 0,
+           VC_ONLY                                       = 0x00,
+           TC_ONLY                                       = 0x01,
+           VC_AND_TC                                     = 0x02,
+       VS_NO_EXTRA_BUFFER_bit                            = 1 << 5,
+    VGT_GS_PER_ES                                         = 0x000088c8,
+    VGT_ES_PER_GS                                         = 0x000088cc,
+    VGT_GS_VERTEX_REUSE                                   = 0x000088d4,
+       VERT_REUSE_mask                                   = 0x1f << 0,
+       VERT_REUSE_shift                                  = 0,
+    VGT_MC_LAT_CNTL                                       = 0x000088d8,
+       MC_TIME_STAMP_RES_mask                            = 0x03 << 0,
+       MC_TIME_STAMP_RES_shift                           = 0,
+           X_0_992_MAX_LATENCY                           = 0x00,
+           X_0_496_MAX_LATENCY                           = 0x01,
+           X_0_248_MAX_LATENCY                           = 0x02,
+           X_0_124_MAX_LATENCY                           = 0x03,
+    VGT_GS_PER_VS                                         = 0x000088e8,
+       GS_PER_VS_mask                                    = 0x0f << 0,
+       GS_PER_VS_shift                                   = 0,
+    VGT_CNTL_STATUS                                       = 0x000088f0,
+       VGT_OUT_INDX_BUSY_bit                             = 1 << 0,
+       VGT_OUT_BUSY_bit                                  = 1 << 1,
+       VGT_PT_BUSY_bit                                   = 1 << 2,
+       VGT_TE_BUSY_bit                                   = 1 << 3,
+       VGT_VR_BUSY_bit                                   = 1 << 4,
+       VGT_GRP_BUSY_bit                                  = 1 << 5,
+       VGT_DMA_REQ_BUSY_bit                              = 1 << 6,
+       VGT_DMA_BUSY_bit                                  = 1 << 7,
+       VGT_GS_BUSY_bit                                   = 1 << 8,
+       VGT_BUSY_bit                                      = 1 << 9,
+    VGT_PRIMITIVE_TYPE                                    = 0x00008958,
+       VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask                = 0x3f << 0,
+       VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift               = 0,
+           DI_PT_NONE                                    = 0x00,
+           DI_PT_POINTLIST                               = 0x01,
+           DI_PT_LINELIST                                = 0x02,
+           DI_PT_LINESTRIP                               = 0x03,
+           DI_PT_TRILIST                                 = 0x04,
+           DI_PT_TRIFAN                                  = 0x05,
+           DI_PT_TRISTRIP                                = 0x06,
+           DI_PT_UNUSED_0                                = 0x07,
+           DI_PT_UNUSED_1                                = 0x08,
+           DI_PT_UNUSED_2                                = 0x09,
+           DI_PT_LINELIST_ADJ                            = 0x0a,
+           DI_PT_LINESTRIP_ADJ                           = 0x0b,
+           DI_PT_TRILIST_ADJ                             = 0x0c,
+           DI_PT_TRISTRIP_ADJ                            = 0x0d,
+           DI_PT_UNUSED_3                                = 0x0e,
+           DI_PT_UNUSED_4                                = 0x0f,
+           DI_PT_TRI_WITH_WFLAGS                         = 0x10,
+           DI_PT_RECTLIST                                = 0x11,
+           DI_PT_LINELOOP                                = 0x12,
+           DI_PT_QUADLIST                                = 0x13,
+           DI_PT_QUADSTRIP                               = 0x14,
+           DI_PT_POLYGON                                 = 0x15,
+           DI_PT_2D_COPY_RECT_LIST_V0                    = 0x16,
+           DI_PT_2D_COPY_RECT_LIST_V1                    = 0x17,
+           DI_PT_2D_COPY_RECT_LIST_V2                    = 0x18,
+           DI_PT_2D_COPY_RECT_LIST_V3                    = 0x19,
+           DI_PT_2D_FILL_RECT_LIST                       = 0x1a,
+           DI_PT_2D_LINE_STRIP                           = 0x1b,
+           DI_PT_2D_TRI_STRIP                            = 0x1c,
+    VGT_INDEX_TYPE                                        = 0x0000895c,
+       INDEX_TYPE_mask                                   = 0x03 << 0,
+       INDEX_TYPE_shift                                  = 0,
+           DI_INDEX_SIZE_16_BIT                          = 0x00,
+           DI_INDEX_SIZE_32_BIT                          = 0x01,
+    VGT_STRMOUT_BUFFER_FILLED_SIZE_0                      = 0x00008960,
+    VGT_STRMOUT_BUFFER_FILLED_SIZE_1                      = 0x00008964,
+    VGT_STRMOUT_BUFFER_FILLED_SIZE_2                      = 0x00008968,
+    VGT_STRMOUT_BUFFER_FILLED_SIZE_3                      = 0x0000896c,
+    VGT_NUM_INDICES                                       = 0x00008970,
+    VGT_NUM_INSTANCES                                     = 0x00008974,
+    PA_CL_CNTL_STATUS                                     = 0x00008a10,
+       CL_BUSY_bit                                       = 1 << 31,
+    PA_CL_ENHANCE                                         = 0x00008a14,
+       CLIP_VTX_REORDER_ENA_bit                          = 1 << 0,
+       NUM_CLIP_SEQ_mask                                 = 0x03 << 1,
+       NUM_CLIP_SEQ_shift                                = 1,
+       CLIPPED_PRIM_SEQ_STALL_bit                        = 1 << 3,
+       VE_NAN_PROC_DISABLE_bit                           = 1 << 4,
+    PA_SU_CNTL_STATUS                                     = 0x00008a50,
+       SU_BUSY_bit                                       = 1 << 31,
+    PA_SC_LINE_STIPPLE_STATE                              = 0x00008b10,
+       CURRENT_PTR_mask                                  = 0x0f << 0,
+       CURRENT_PTR_shift                                 = 0,
+       CURRENT_COUNT_mask                                = 0xff << 8,
+       CURRENT_COUNT_shift                               = 8,
+    PA_SC_MULTI_CHIP_CNTL                                 = 0x00008b20,
+       LOG2_NUM_CHIPS_mask                               = 0x07 << 0,
+       LOG2_NUM_CHIPS_shift                              = 0,
+       MULTI_CHIP_TILE_SIZE_mask                         = 0x03 << 3,
+       MULTI_CHIP_TILE_SIZE_shift                        = 3,
+           X_16_X_16_PIXEL_TILE_PER_CHIP                 = 0x00,
+           X_32_X_32_PIXEL_TILE_PER_CHIP                 = 0x01,
+           X_64_X_64_PIXEL_TILE_PER_CHIP                 = 0x02,
+           X_128X128_PIXEL_TILE_PER_CHIP                 = 0x03,
+       CHIP_TILE_X_LOC_mask                              = 0x07 << 5,
+       CHIP_TILE_X_LOC_shift                             = 5,
+       CHIP_TILE_Y_LOC_mask                              = 0x07 << 8,
+       CHIP_TILE_Y_LOC_shift                             = 8,
+       CHIP_SUPER_TILE_B_bit                             = 1 << 11,
+    PA_SC_AA_SAMPLE_LOCS_2S                               = 0x00008b40,
+       S0_X_mask                                         = 0x0f << 0,
+       S0_X_shift                                        = 0,
+       S0_Y_mask                                         = 0x0f << 4,
+       S0_Y_shift                                        = 4,
+       S1_X_mask                                         = 0x0f << 8,
+       S1_X_shift                                        = 8,
+       S1_Y_mask                                         = 0x0f << 12,
+       S1_Y_shift                                        = 12,
+    PA_SC_AA_SAMPLE_LOCS_4S                               = 0x00008b44,
+/*     S0_X_mask                                         = 0x0f << 0, */
+/*     S0_X_shift                                        = 0, */
+/*     S0_Y_mask                                         = 0x0f << 4, */
+/*     S0_Y_shift                                        = 4, */
+/*     S1_X_mask                                         = 0x0f << 8, */
+/*     S1_X_shift                                        = 8, */
+/*     S1_Y_mask                                         = 0x0f << 12, */
+/*     S1_Y_shift                                        = 12, */
+       S2_X_mask                                         = 0x0f << 16,
+       S2_X_shift                                        = 16,
+       S2_Y_mask                                         = 0x0f << 20,
+       S2_Y_shift                                        = 20,
+       S3_X_mask                                         = 0x0f << 24,
+       S3_X_shift                                        = 24,
+       S3_Y_mask                                         = 0x0f << 28,
+       S3_Y_shift                                        = 28,
+    PA_SC_AA_SAMPLE_LOCS_8S_WD0                           = 0x00008b48,
+/*     S0_X_mask                                         = 0x0f << 0, */
+/*     S0_X_shift                                        = 0, */
+/*     S0_Y_mask                                         = 0x0f << 4, */
+/*     S0_Y_shift                                        = 4, */
+/*     S1_X_mask                                         = 0x0f << 8, */
+/*     S1_X_shift                                        = 8, */
+/*     S1_Y_mask                                         = 0x0f << 12, */
+/*     S1_Y_shift                                        = 12, */
+/*     S2_X_mask                                         = 0x0f << 16, */
+/*     S2_X_shift                                        = 16, */
+/*     S2_Y_mask                                         = 0x0f << 20, */
+/*     S2_Y_shift                                        = 20, */
+/*     S3_X_mask                                         = 0x0f << 24, */
+/*     S3_X_shift                                        = 24, */
+/*     S3_Y_mask                                         = 0x0f << 28, */
+/*     S3_Y_shift                                        = 28, */
+    PA_SC_AA_SAMPLE_LOCS_8S_WD1                           = 0x00008b4c,
+       S4_X_mask                                         = 0x0f << 0,
+       S4_X_shift                                        = 0,
+       S4_Y_mask                                         = 0x0f << 4,
+       S4_Y_shift                                        = 4,
+       S5_X_mask                                         = 0x0f << 8,
+       S5_X_shift                                        = 8,
+       S5_Y_mask                                         = 0x0f << 12,
+       S5_Y_shift                                        = 12,
+       S6_X_mask                                         = 0x0f << 16,
+       S6_X_shift                                        = 16,
+       S6_Y_mask                                         = 0x0f << 20,
+       S6_Y_shift                                        = 20,
+       S7_X_mask                                         = 0x0f << 24,
+       S7_X_shift                                        = 24,
+       S7_Y_mask                                         = 0x0f << 28,
+       S7_Y_shift                                        = 28,
+    PA_SC_CNTL_STATUS                                     = 0x00008be0,
+       MPASS_OVERFLOW_bit                                = 1 << 30,
+    PA_SC_ENHANCE                                         = 0x00008bf0,
+       FORCE_EOV_MAX_CLK_CNT_mask                        = 0xfff << 0,
+       FORCE_EOV_MAX_CLK_CNT_shift                       = 0,
+       FORCE_EOV_MAX_TILE_CNT_mask                       = 0xfff << 12,
+       FORCE_EOV_MAX_TILE_CNT_shift                      = 12,
+    SQ_CONFIG                                             = 0x00008c00,
+       VC_ENABLE_bit                                     = 1 << 0,
+       EXPORT_SRC_C_bit                                  = 1 << 1,
+       DX9_CONSTS_bit                                    = 1 << 2,
+       ALU_INST_PREFER_VECTOR_bit                        = 1 << 3,
+       SQ_CONFIG__DX10_CLAMP_bit                         = 1 << 4,
+       ALU_PREFER_ONE_WATERFALL_bit                      = 1 << 5,
+       ALU_MAX_ONE_WATERFALL_bit                         = 1 << 6,
+       CLAUSE_SEQ_PRIO_mask                              = 0x03 << 8,
+       CLAUSE_SEQ_PRIO_shift                             = 8,
+           SQ_CL_PRIO_RND_ROBIN                          = 0x00,
+           SQ_CL_PRIO_MACRO_SEQ                          = 0x01,
+           SQ_CL_PRIO_NONE                               = 0x02,
+       PS_PRIO_mask                                      = 0x03 << 24,
+       PS_PRIO_shift                                     = 24,
+       VS_PRIO_mask                                      = 0x03 << 26,
+       VS_PRIO_shift                                     = 26,
+       GS_PRIO_mask                                      = 0x03 << 28,
+       GS_PRIO_shift                                     = 28,
+       ES_PRIO_mask                                      = 0x03 << 30,
+       ES_PRIO_shift                                     = 30,
+    SQ_GPR_RESOURCE_MGMT_1                                = 0x00008c04,
+       NUM_PS_GPRS_mask                                  = 0xff << 0,
+       NUM_PS_GPRS_shift                                 = 0,
+       NUM_VS_GPRS_mask                                  = 0xff << 16,
+       NUM_VS_GPRS_shift                                 = 16,
+       NUM_CLAUSE_TEMP_GPRS_mask                         = 0x0f << 28,
+       NUM_CLAUSE_TEMP_GPRS_shift                        = 28,
+    SQ_GPR_RESOURCE_MGMT_2                                = 0x00008c08,
+       NUM_GS_GPRS_mask                                  = 0xff << 0,
+       NUM_GS_GPRS_shift                                 = 0,
+       NUM_ES_GPRS_mask                                  = 0xff << 16,
+       NUM_ES_GPRS_shift                                 = 16,
+    SQ_THREAD_RESOURCE_MGMT                               = 0x00008c0c,
+       NUM_PS_THREADS_mask                               = 0xff << 0,
+       NUM_PS_THREADS_shift                              = 0,
+       NUM_VS_THREADS_mask                               = 0xff << 8,
+       NUM_VS_THREADS_shift                              = 8,
+       NUM_GS_THREADS_mask                               = 0xff << 16,
+       NUM_GS_THREADS_shift                              = 16,
+       NUM_ES_THREADS_mask                               = 0xff << 24,
+       NUM_ES_THREADS_shift                              = 24,
+    SQ_STACK_RESOURCE_MGMT_1                              = 0x00008c10,
+       NUM_PS_STACK_ENTRIES_mask                         = 0xfff << 0,
+       NUM_PS_STACK_ENTRIES_shift                        = 0,
+       NUM_VS_STACK_ENTRIES_mask                         = 0xfff << 16,
+       NUM_VS_STACK_ENTRIES_shift                        = 16,
+    SQ_STACK_RESOURCE_MGMT_2                              = 0x00008c14,
+       NUM_GS_STACK_ENTRIES_mask                         = 0xfff << 0,
+       NUM_GS_STACK_ENTRIES_shift                        = 0,
+       NUM_ES_STACK_ENTRIES_mask                         = 0xfff << 16,
+       NUM_ES_STACK_ENTRIES_shift                        = 16,
+    SQ_ESGS_RING_BASE                                     = 0x00008c40,
+    SQ_ESGS_RING_SIZE                                     = 0x00008c44,
+    SQ_GSVS_RING_BASE                                     = 0x00008c48,
+    SQ_GSVS_RING_SIZE                                     = 0x00008c4c,
+    SQ_ESTMP_RING_BASE                                    = 0x00008c50,
+    SQ_ESTMP_RING_SIZE                                    = 0x00008c54,
+    SQ_GSTMP_RING_BASE                                    = 0x00008c58,
+    SQ_GSTMP_RING_SIZE                                    = 0x00008c5c,
+    SQ_VSTMP_RING_BASE                                    = 0x00008c60,
+    SQ_VSTMP_RING_SIZE                                    = 0x00008c64,
+    SQ_PSTMP_RING_BASE                                    = 0x00008c68,
+    SQ_PSTMP_RING_SIZE                                    = 0x00008c6c,
+    SQ_FBUF_RING_BASE                                     = 0x00008c70,
+    SQ_FBUF_RING_SIZE                                     = 0x00008c74,
+    SQ_REDUC_RING_BASE                                    = 0x00008c78,
+    SQ_REDUC_RING_SIZE                                    = 0x00008c7c,
+    SQ_ALU_WORD1_OP3                                      = 0x00008dfc,
+       SRC2_SEL_mask                                     = 0x1ff << 0,
+       SRC2_SEL_shift                                    = 0,
+           SQ_ALU_SRC_0                                  = 0xf8,
+           SQ_ALU_SRC_1                                  = 0xf9,
+           SQ_ALU_SRC_1_INT                              = 0xfa,
+           SQ_ALU_SRC_M_1_INT                            = 0xfb,
+           SQ_ALU_SRC_0_5                                = 0xfc,
+           SQ_ALU_SRC_LITERAL                            = 0xfd,
+           SQ_ALU_SRC_PV                                 = 0xfe,
+           SQ_ALU_SRC_PS                                 = 0xff,
+       SRC2_REL_bit                                      = 1 << 9,
+       SRC2_CHAN_mask                                    = 0x03 << 10,
+       SRC2_CHAN_shift                                   = 10,
+           SQ_CHAN_X                                     = 0x00,
+           SQ_CHAN_Y                                     = 0x01,
+           SQ_CHAN_Z                                     = 0x02,
+           SQ_CHAN_W                                     = 0x03,
+       SRC2_NEG_bit                                      = 1 << 12,
+       SQ_ALU_WORD1_OP3__ALU_INST_mask                   = 0x1f << 13,
+       SQ_ALU_WORD1_OP3__ALU_INST_shift                  = 13,
+           SQ_OP3_INST_MUL_LIT                           = 0x0c,
+           SQ_OP3_INST_MUL_LIT_M2                        = 0x0d,
+           SQ_OP3_INST_MUL_LIT_M4                        = 0x0e,
+           SQ_OP3_INST_MUL_LIT_D2                        = 0x0f,
+           SQ_OP3_INST_MULADD                            = 0x10,
+           SQ_OP3_INST_MULADD_M2                         = 0x11,
+           SQ_OP3_INST_MULADD_M4                         = 0x12,
+           SQ_OP3_INST_MULADD_D2                         = 0x13,
+           SQ_OP3_INST_MULADD_IEEE                       = 0x14,
+           SQ_OP3_INST_MULADD_IEEE_M2                    = 0x15,
+           SQ_OP3_INST_MULADD_IEEE_M4                    = 0x16,
+           SQ_OP3_INST_MULADD_IEEE_D2                    = 0x17,
+           SQ_OP3_INST_CNDE                              = 0x18,
+           SQ_OP3_INST_CNDGT                             = 0x19,
+           SQ_OP3_INST_CNDGE                             = 0x1a,
+           SQ_OP3_INST_CNDE_INT                          = 0x1c,
+           SQ_OP3_INST_CNDGT_INT                         = 0x1d,
+           SQ_OP3_INST_CNDGE_INT                         = 0x1e,
+    SQ_TEX_WORD2                                          = 0x00008dfc,
+       OFFSET_X_mask                                     = 0x1f << 0,
+       OFFSET_X_shift                                    = 0,
+       OFFSET_Y_mask                                     = 0x1f << 5,
+       OFFSET_Y_shift                                    = 5,
+       OFFSET_Z_mask                                     = 0x1f << 10,
+       OFFSET_Z_shift                                    = 10,
+       SAMPLER_ID_mask                                   = 0x1f << 15,
+       SAMPLER_ID_shift                                  = 15,
+       SQ_TEX_WORD2__SRC_SEL_X_mask                      = 0x07 << 20,
+       SQ_TEX_WORD2__SRC_SEL_X_shift                     = 20,
+           SQ_SEL_X                                      = 0x00,
+           SQ_SEL_Y                                      = 0x01,
+           SQ_SEL_Z                                      = 0x02,
+           SQ_SEL_W                                      = 0x03,
+           SQ_SEL_0                                      = 0x04,
+           SQ_SEL_1                                      = 0x05,
+       SRC_SEL_Y_mask                                    = 0x07 << 23,
+       SRC_SEL_Y_shift                                   = 23,
+/*         SQ_SEL_X                                      = 0x00, */
+/*         SQ_SEL_Y                                      = 0x01, */
+/*         SQ_SEL_Z                                      = 0x02, */
+/*         SQ_SEL_W                                      = 0x03, */
+/*         SQ_SEL_0                                      = 0x04, */
+/*         SQ_SEL_1                                      = 0x05, */
+       SRC_SEL_Z_mask                                    = 0x07 << 26,
+       SRC_SEL_Z_shift                                   = 26,
+/*         SQ_SEL_X                                      = 0x00, */
+/*         SQ_SEL_Y                                      = 0x01, */
+/*         SQ_SEL_Z                                      = 0x02, */
+/*         SQ_SEL_W                                      = 0x03, */
+/*         SQ_SEL_0                                      = 0x04, */
+/*         SQ_SEL_1                                      = 0x05, */
+       SRC_SEL_W_mask                                    = 0x07 << 29,
+       SRC_SEL_W_shift                                   = 29,
+/*         SQ_SEL_X                                      = 0x00, */
+/*         SQ_SEL_Y                                      = 0x01, */
+/*         SQ_SEL_Z                                      = 0x02, */
+/*         SQ_SEL_W                                      = 0x03, */
+/*         SQ_SEL_0                                      = 0x04, */
+/*         SQ_SEL_1                                      = 0x05, */
+    SQ_CF_ALLOC_EXPORT_WORD1                              = 0x00008dfc,
+       BURST_COUNT_mask                                  = 0x0f << 17,
+       BURST_COUNT_shift                                 = 17,
+       END_OF_PROGRAM_bit                                = 1 << 21,
+       VALID_PIXEL_MODE_bit                              = 1 << 22,
+       SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_mask            = 0x7f << 23,
+       SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_shift           = 23,
+           SQ_CF_INST_MEM_STREAM0                        = 0x20,
+           SQ_CF_INST_MEM_STREAM1                        = 0x21,
+           SQ_CF_INST_MEM_STREAM2                        = 0x22,
+           SQ_CF_INST_MEM_STREAM3                        = 0x23,
+           SQ_CF_INST_MEM_SCRATCH                        = 0x24,
+           SQ_CF_INST_MEM_REDUCTION                      = 0x25,
+           SQ_CF_INST_MEM_RING                           = 0x26,
+           SQ_CF_INST_EXPORT                             = 0x27,
+           SQ_CF_INST_EXPORT_DONE                        = 0x28,
+       WHOLE_QUAD_MODE_bit                               = 1 << 30,
+       BARRIER_bit                                       = 1 << 31,
+    SQ_CF_ALU_WORD1                                       = 0x00008dfc,
+       KCACHE_MODE1_mask                                 = 0x03 << 0,
+       KCACHE_MODE1_shift                                = 0,
+           SQ_CF_KCACHE_NOP                              = 0x00,
+           SQ_CF_KCACHE_LOCK_1                           = 0x01,
+           SQ_CF_KCACHE_LOCK_2                           = 0x02,
+           SQ_CF_KCACHE_LOCK_LOOP_INDEX                  = 0x03,
+       KCACHE_ADDR0_mask                                 = 0xff << 2,
+       KCACHE_ADDR0_shift                                = 2,
+       KCACHE_ADDR1_mask                                 = 0xff << 10,
+       KCACHE_ADDR1_shift                                = 10,
+       SQ_CF_ALU_WORD1__COUNT_mask                       = 0x7f << 18,
+       SQ_CF_ALU_WORD1__COUNT_shift                      = 18,
+       SQ_CF_ALU_WORD1__ALT_CONST_bit                    = 1 << 25,
+       SQ_CF_ALU_WORD1__CF_INST_mask                     = 0x0f << 26,
+       SQ_CF_ALU_WORD1__CF_INST_shift                    = 26,
+           SQ_CF_INST_ALU                                = 0x08,
+           SQ_CF_INST_ALU_PUSH_BEFORE                    = 0x09,
+           SQ_CF_INST_ALU_POP_AFTER                      = 0x0a,
+           SQ_CF_INST_ALU_POP2_AFTER                     = 0x0b,
+           SQ_CF_INST_ALU_CONTINUE                       = 0x0d,
+           SQ_CF_INST_ALU_BREAK                          = 0x0e,
+           SQ_CF_INST_ALU_ELSE_AFTER                     = 0x0f,
+/*     WHOLE_QUAD_MODE_bit                               = 1 << 30, */
+/*     BARRIER_bit                                       = 1 << 31, */
+    SQ_TEX_WORD1                                          = 0x00008dfc,
+       SQ_TEX_WORD1__DST_GPR_mask                        = 0x7f << 0,
+       SQ_TEX_WORD1__DST_GPR_shift                       = 0,
+       SQ_TEX_WORD1__DST_REL_bit                         = 1 << 7,
+       SQ_TEX_WORD1__DST_SEL_X_mask                      = 0x07 << 9,
+       SQ_TEX_WORD1__DST_SEL_X_shift                     = 9,
+/*         SQ_SEL_X                                      = 0x00, */
+/*         SQ_SEL_Y                                      = 0x01, */
+/*         SQ_SEL_Z                                      = 0x02, */
+/*         SQ_SEL_W                                      = 0x03, */
+/*         SQ_SEL_0                                      = 0x04, */
+/*         SQ_SEL_1                                      = 0x05, */
+           SQ_SEL_MASK                                   = 0x07,
+       SQ_TEX_WORD1__DST_SEL_Y_mask                      = 0x07 << 12,
+       SQ_TEX_WORD1__DST_SEL_Y_shift                     = 12,
+/*         SQ_SEL_X                                      = 0x00, */
+/*         SQ_SEL_Y                                      = 0x01, */
+/*         SQ_SEL_Z                                      = 0x02, */
+/*         SQ_SEL_W                                      = 0x03, */
+/*         SQ_SEL_0                                      = 0x04, */
+/*         SQ_SEL_1                                      = 0x05, */
+/*         SQ_SEL_MASK                                   = 0x07, */
+       SQ_TEX_WORD1__DST_SEL_Z_mask                      = 0x07 << 15,
+       SQ_TEX_WORD1__DST_SEL_Z_shift                     = 15,
+/*         SQ_SEL_X                                      = 0x00, */
+/*         SQ_SEL_Y                                      = 0x01, */
+/*         SQ_SEL_Z                                      = 0x02, */
+/*         SQ_SEL_W                                      = 0x03, */
+/*         SQ_SEL_0                                      = 0x04, */
+/*         SQ_SEL_1                                      = 0x05, */
+/*         SQ_SEL_MASK                                   = 0x07, */
+       SQ_TEX_WORD1__DST_SEL_W_mask                      = 0x07 << 18,
+       SQ_TEX_WORD1__DST_SEL_W_shift                     = 18,
+/*         SQ_SEL_X                                      = 0x00, */
+/*         SQ_SEL_Y                                      = 0x01, */
+/*         SQ_SEL_Z                                      = 0x02, */
+/*         SQ_SEL_W                                      = 0x03, */
+/*         SQ_SEL_0                                      = 0x04, */
+/*         SQ_SEL_1                                      = 0x05, */
+/*         SQ_SEL_MASK                                   = 0x07, */
+       SQ_TEX_WORD1__LOD_BIAS_mask                       = 0x7f << 21,
+       SQ_TEX_WORD1__LOD_BIAS_shift                      = 21,
+       COORD_TYPE_X_bit                                  = 1 << 28,
+       COORD_TYPE_Y_bit                                  = 1 << 29,
+       COORD_TYPE_Z_bit                                  = 1 << 30,
+       COORD_TYPE_W_bit                                  = 1 << 31,
+    SQ_VTX_WORD0                                          = 0x00008dfc,
+       VTX_INST_mask                                     = 0x1f << 0,
+       VTX_INST_shift                                    = 0,
+           SQ_VTX_INST_FETCH                             = 0x00,
+           SQ_VTX_INST_SEMANTIC                          = 0x01,
+       FETCH_TYPE_mask                                   = 0x03 << 5,
+       FETCH_TYPE_shift                                  = 5,
+           SQ_VTX_FETCH_VERTEX_DATA                      = 0x00,
+           SQ_VTX_FETCH_INSTANCE_DATA                    = 0x01,
+           SQ_VTX_FETCH_NO_INDEX_OFFSET                  = 0x02,
+       FETCH_WHOLE_QUAD_bit                              = 1 << 7,
+       BUFFER_ID_mask                                    = 0xff << 8,
+       BUFFER_ID_shift                                   = 8,
+       SRC_GPR_mask                                      = 0x7f << 16,
+       SRC_GPR_shift                                     = 16,
+       SRC_REL_bit                                       = 1 << 23,
+       SQ_VTX_WORD0__SRC_SEL_X_mask                      = 0x03 << 24,
+       SQ_VTX_WORD0__SRC_SEL_X_shift                     = 24,
+/*         SQ_SEL_X                                      = 0x00, */
+/*         SQ_SEL_Y                                      = 0x01, */
+/*         SQ_SEL_Z                                      = 0x02, */
+/*         SQ_SEL_W                                      = 0x03, */
+       MEGA_FETCH_COUNT_mask                             = 0x3f << 26,
+       MEGA_FETCH_COUNT_shift                            = 26,
+    SQ_CF_ALLOC_EXPORT_WORD1_SWIZ                         = 0x00008dfc,
+       SEL_X_mask                                        = 0x07 << 0,
+       SEL_X_shift                                       = 0,
+/*         SQ_SEL_X                                      = 0x00, */
+/*         SQ_SEL_Y                                      = 0x01, */
+/*         SQ_SEL_Z                                      = 0x02, */
+/*         SQ_SEL_W                                      = 0x03, */
+/*         SQ_SEL_0                                      = 0x04, */
+/*         SQ_SEL_1                                      = 0x05, */
+/*         SQ_SEL_MASK                                   = 0x07, */
+       SEL_Y_mask                                        = 0x07 << 3,
+       SEL_Y_shift                                       = 3,
+/*         SQ_SEL_X                                      = 0x00, */
+/*         SQ_SEL_Y                                      = 0x01, */
+/*         SQ_SEL_Z                                      = 0x02, */
+/*         SQ_SEL_W                                      = 0x03, */
+/*         SQ_SEL_0                                      = 0x04, */
+/*         SQ_SEL_1                                      = 0x05, */
+/*         SQ_SEL_MASK                                   = 0x07, */
+       SEL_Z_mask                                        = 0x07 << 6,
+       SEL_Z_shift                                       = 6,
+/*         SQ_SEL_X                                      = 0x00, */
+/*         SQ_SEL_Y                                      = 0x01, */
+/*         SQ_SEL_Z                                      = 0x02, */
+/*         SQ_SEL_W                                      = 0x03, */
+/*         SQ_SEL_0                                      = 0x04, */
+/*         SQ_SEL_1                                      = 0x05, */
+/*         SQ_SEL_MASK                                   = 0x07, */
+       SEL_W_mask                                        = 0x07 << 9,
+       SEL_W_shift                                       = 9,
+/*         SQ_SEL_X                                      = 0x00, */
+/*         SQ_SEL_Y                                      = 0x01, */
+/*         SQ_SEL_Z                                      = 0x02, */
+/*         SQ_SEL_W                                      = 0x03, */
+/*         SQ_SEL_0                                      = 0x04, */
+/*         SQ_SEL_1                                      = 0x05, */
+/*         SQ_SEL_MASK                                   = 0x07, */
+    SQ_ALU_WORD1                                          = 0x00008dfc,
+       ENCODING_mask                                     = 0x07 << 15,
+       ENCODING_shift                                    = 15,
+       BANK_SWIZZLE_mask                                 = 0x07 << 18,
+       BANK_SWIZZLE_shift                                = 18,
+           SQ_ALU_VEC_012                                = 0x00,
+           SQ_ALU_VEC_021                                = 0x01,
+           SQ_ALU_VEC_120                                = 0x02,
+           SQ_ALU_VEC_102                                = 0x03,
+           SQ_ALU_VEC_201                                = 0x04,
+           SQ_ALU_VEC_210                                = 0x05,
+       SQ_ALU_WORD1__DST_GPR_mask                        = 0x7f << 21,
+       SQ_ALU_WORD1__DST_GPR_shift                       = 21,
+       SQ_ALU_WORD1__DST_REL_bit                         = 1 << 28,
+       DST_CHAN_mask                                     = 0x03 << 29,
+       DST_CHAN_shift                                    = 29,
+           CHAN_X                                        = 0x00,
+           CHAN_Y                                        = 0x01,
+           CHAN_Z                                        = 0x02,
+           CHAN_W                                        = 0x03,
+       SQ_ALU_WORD1__CLAMP_bit                           = 1 << 31,
+    SQ_CF_ALU_WORD0                                       = 0x00008dfc,
+       SQ_CF_ALU_WORD0__ADDR_mask                        = 0x3fffff << 0,
+       SQ_CF_ALU_WORD0__ADDR_shift                       = 0,
+       KCACHE_BANK0_mask                                 = 0x0f << 22,
+       KCACHE_BANK0_shift                                = 22,
+       KCACHE_BANK1_mask                                 = 0x0f << 26,
+       KCACHE_BANK1_shift                                = 26,
+       KCACHE_MODE0_mask                                 = 0x03 << 30,
+       KCACHE_MODE0_shift                                = 30,
+/*         SQ_CF_KCACHE_NOP                              = 0x00, */
+/*         SQ_CF_KCACHE_LOCK_1                           = 0x01, */
+/*         SQ_CF_KCACHE_LOCK_2                           = 0x02, */
+/*         SQ_CF_KCACHE_LOCK_LOOP_INDEX                  = 0x03, */
+    SQ_VTX_WORD2                                          = 0x00008dfc,
+       SQ_VTX_WORD2__OFFSET_mask                         = 0xffff << 0,
+       SQ_VTX_WORD2__OFFSET_shift                        = 0,
+       SQ_VTX_WORD2__ENDIAN_SWAP_mask                    = 0x03 << 16,
+       SQ_VTX_WORD2__ENDIAN_SWAP_shift                   = 16,
+           SQ_ENDIAN_NONE                                = 0x00,
+           SQ_ENDIAN_8IN16                               = 0x01,
+           SQ_ENDIAN_8IN32                               = 0x02,
+       CONST_BUF_NO_STRIDE_bit                           = 1 << 18,
+       MEGA_FETCH_bit                                    = 1 << 19,
+       SQ_VTX_WORD2__ALT_CONST_bit                       = 1 << 20,
+    SQ_ALU_WORD1_OP2_V2                                   = 0x00008dfc,
+       SRC0_ABS_bit                                      = 1 << 0,
+       SRC1_ABS_bit                                      = 1 << 1,
+       UPDATE_EXECUTE_MASK_bit                           = 1 << 2,
+       UPDATE_PRED_bit                                   = 1 << 3,
+       WRITE_MASK_bit                                    = 1 << 4,
+       SQ_ALU_WORD1_OP2_V2__OMOD_mask                    = 0x03 << 5,
+       SQ_ALU_WORD1_OP2_V2__OMOD_shift                   = 5,
+           SQ_ALU_OMOD_OFF                               = 0x00,
+           SQ_ALU_OMOD_M2                                = 0x01,
+           SQ_ALU_OMOD_M4                                = 0x02,
+           SQ_ALU_OMOD_D2                                = 0x03,
+       SQ_ALU_WORD1_OP2_V2__ALU_INST_mask                = 0x7ff << 7,
+       SQ_ALU_WORD1_OP2_V2__ALU_INST_shift               = 7,
+           SQ_OP2_INST_ADD                               = 0x00,
+           SQ_OP2_INST_MUL                               = 0x01,
+           SQ_OP2_INST_MUL_IEEE                          = 0x02,
+           SQ_OP2_INST_MAX                               = 0x03,
+           SQ_OP2_INST_MIN                               = 0x04,
+           SQ_OP2_INST_MAX_DX10                          = 0x05,
+           SQ_OP2_INST_MIN_DX10                          = 0x06,
+           SQ_OP2_INST_SETE                              = 0x08,
+           SQ_OP2_INST_SETGT                             = 0x09,
+           SQ_OP2_INST_SETGE                             = 0x0a,
+           SQ_OP2_INST_SETNE                             = 0x0b,
+           SQ_OP2_INST_SETE_DX10                         = 0x0c,
+           SQ_OP2_INST_SETGT_DX10                        = 0x0d,
+           SQ_OP2_INST_SETGE_DX10                        = 0x0e,
+           SQ_OP2_INST_SETNE_DX10                        = 0x0f,
+           SQ_OP2_INST_FRACT                             = 0x10,
+           SQ_OP2_INST_TRUNC                             = 0x11,
+           SQ_OP2_INST_CEIL                              = 0x12,
+           SQ_OP2_INST_RNDNE                             = 0x13,
+           SQ_OP2_INST_FLOOR                             = 0x14,
+           SQ_OP2_INST_MOVA                              = 0x15,
+           SQ_OP2_INST_MOVA_FLOOR                        = 0x16,
+           SQ_OP2_INST_MOVA_INT                          = 0x18,
+           SQ_OP2_INST_MOV                               = 0x19,
+           SQ_OP2_INST_NOP                               = 0x1a,
+           SQ_OP2_INST_PRED_SETGT_UINT                   = 0x1e,
+           SQ_OP2_INST_PRED_SETGE_UINT                   = 0x1f,
+           SQ_OP2_INST_PRED_SETE                         = 0x20,
+           SQ_OP2_INST_PRED_SETGT                        = 0x21,
+           SQ_OP2_INST_PRED_SETGE                        = 0x22,
+           SQ_OP2_INST_PRED_SETNE                        = 0x23,
+           SQ_OP2_INST_PRED_SET_INV                      = 0x24,
+           SQ_OP2_INST_PRED_SET_POP                      = 0x25,
+           SQ_OP2_INST_PRED_SET_CLR                      = 0x26,
+           SQ_OP2_INST_PRED_SET_RESTORE                  = 0x27,
+           SQ_OP2_INST_PRED_SETE_PUSH                    = 0x28,
+           SQ_OP2_INST_PRED_SETGT_PUSH                   = 0x29,
+           SQ_OP2_INST_PRED_SETGE_PUSH                   = 0x2a,
+           SQ_OP2_INST_PRED_SETNE_PUSH                   = 0x2b,
+           SQ_OP2_INST_KILLE                             = 0x2c,
+           SQ_OP2_INST_KILLGT                            = 0x2d,
+           SQ_OP2_INST_KILLGE                            = 0x2e,
+           SQ_OP2_INST_KILLNE                            = 0x2f,
+           SQ_OP2_INST_AND_INT                           = 0x30,
+           SQ_OP2_INST_OR_INT                            = 0x31,
+           SQ_OP2_INST_XOR_INT                           = 0x32,
+           SQ_OP2_INST_NOT_INT                           = 0x33,
+           SQ_OP2_INST_ADD_INT                           = 0x34,
+           SQ_OP2_INST_SUB_INT                           = 0x35,
+           SQ_OP2_INST_MAX_INT                           = 0x36,
+           SQ_OP2_INST_MIN_INT                           = 0x37,
+           SQ_OP2_INST_MAX_UINT                          = 0x38,
+           SQ_OP2_INST_MIN_UINT                          = 0x39,
+           SQ_OP2_INST_SETE_INT                          = 0x3a,
+           SQ_OP2_INST_SETGT_INT                         = 0x3b,
+           SQ_OP2_INST_SETGE_INT                         = 0x3c,
+           SQ_OP2_INST_SETNE_INT                         = 0x3d,
+           SQ_OP2_INST_SETGT_UINT                        = 0x3e,
+           SQ_OP2_INST_SETGE_UINT                        = 0x3f,
+           SQ_OP2_INST_KILLGT_UINT                       = 0x40,
+           SQ_OP2_INST_KILLGE_UINT                       = 0x41,
+           SQ_OP2_INST_PRED_SETE_INT                     = 0x42,
+           SQ_OP2_INST_PRED_SETGT_INT                    = 0x43,
+           SQ_OP2_INST_PRED_SETGE_INT                    = 0x44,
+           SQ_OP2_INST_PRED_SETNE_INT                    = 0x45,
+           SQ_OP2_INST_KILLE_INT                         = 0x46,
+           SQ_OP2_INST_KILLGT_INT                        = 0x47,
+           SQ_OP2_INST_KILLGE_INT                        = 0x48,
+           SQ_OP2_INST_KILLNE_INT                        = 0x49,
+           SQ_OP2_INST_PRED_SETE_PUSH_INT                = 0x4a,
+           SQ_OP2_INST_PRED_SETGT_PUSH_INT               = 0x4b,
+           SQ_OP2_INST_PRED_SETGE_PUSH_INT               = 0x4c,
+           SQ_OP2_INST_PRED_SETNE_PUSH_INT               = 0x4d,
+           SQ_OP2_INST_PRED_SETLT_PUSH_INT               = 0x4e,
+           SQ_OP2_INST_PRED_SETLE_PUSH_INT               = 0x4f,
+           SQ_OP2_INST_DOT4                              = 0x50,
+           SQ_OP2_INST_DOT4_IEEE                         = 0x51,
+           SQ_OP2_INST_CUBE                              = 0x52,
+           SQ_OP2_INST_MAX4                              = 0x53,
+           SQ_OP2_INST_MOVA_GPR_INT                      = 0x60,
+           SQ_OP2_INST_EXP_IEEE                          = 0x61,
+           SQ_OP2_INST_LOG_CLAMPED                       = 0x62,
+           SQ_OP2_INST_LOG_IEEE                          = 0x63,
+           SQ_OP2_INST_RECIP_CLAMPED                     = 0x64,
+           SQ_OP2_INST_RECIP_FF                          = 0x65,
+           SQ_OP2_INST_RECIP_IEEE                        = 0x66,
+           SQ_OP2_INST_RECIPSQRT_CLAMPED                 = 0x67,
+           SQ_OP2_INST_RECIPSQRT_FF                      = 0x68,
+           SQ_OP2_INST_RECIPSQRT_IEEE                    = 0x69,
+           SQ_OP2_INST_SQRT_IEEE                         = 0x6a,
+           SQ_OP2_INST_FLT_TO_INT                        = 0x6b,
+           SQ_OP2_INST_INT_TO_FLT                        = 0x6c,
+           SQ_OP2_INST_UINT_TO_FLT                       = 0x6d,
+           SQ_OP2_INST_SIN                               = 0x6e,
+           SQ_OP2_INST_COS                               = 0x6f,
+           SQ_OP2_INST_ASHR_INT                          = 0x70,
+           SQ_OP2_INST_LSHR_INT                          = 0x71,
+           SQ_OP2_INST_LSHL_INT                          = 0x72,
+           SQ_OP2_INST_MULLO_INT                         = 0x73,
+           SQ_OP2_INST_MULHI_INT                         = 0x74,
+           SQ_OP2_INST_MULLO_UINT                        = 0x75,
+           SQ_OP2_INST_MULHI_UINT                        = 0x76,
+           SQ_OP2_INST_RECIP_INT                         = 0x77,
+           SQ_OP2_INST_RECIP_UINT                        = 0x78,
+           SQ_OP2_INST_FLT_TO_UINT                       = 0x79,
+    SQ_CF_ALLOC_EXPORT_WORD1_BUF                          = 0x00008dfc,
+       ARRAY_SIZE_mask                                   = 0xfff << 0,
+       ARRAY_SIZE_shift                                  = 0,
+       COMP_MASK_mask                                    = 0x0f << 12,
+       COMP_MASK_shift                                   = 12,
+    SQ_CF_WORD0                                           = 0x00008dfc,
+    SQ_CF_ALLOC_EXPORT_WORD0                              = 0x00008dfc,
+       ARRAY_BASE_mask                                   = 0x1fff << 0,
+       ARRAY_BASE_shift                                  = 0,
+       SQ_CF_ALLOC_EXPORT_WORD0__TYPE_mask               = 0x03 << 13,
+       SQ_CF_ALLOC_EXPORT_WORD0__TYPE_shift              = 13,
+           SQ_EXPORT_PIXEL                               = 0x00,
+           SQ_EXPORT_POS                                 = 0x01,
+           SQ_EXPORT_PARAM                               = 0x02,
+           X_UNUSED_FOR_SX_EXPORTS                       = 0x03,
+       RW_GPR_mask                                       = 0x7f << 15,
+       RW_GPR_shift                                      = 15,
+       RW_REL_bit                                        = 1 << 22,
+       INDEX_GPR_mask                                    = 0x7f << 23,
+       INDEX_GPR_shift                                   = 23,
+       ELEM_SIZE_mask                                    = 0x03 << 30,
+       ELEM_SIZE_shift                                   = 30,
+    SQ_VTX_WORD1                                          = 0x00008dfc,
+       SQ_VTX_WORD1__DST_SEL_X_mask                      = 0x07 << 9,
+       SQ_VTX_WORD1__DST_SEL_X_shift                     = 9,
+/*         SQ_SEL_X                                      = 0x00, */
+/*         SQ_SEL_Y                                      = 0x01, */
+/*         SQ_SEL_Z                                      = 0x02, */
+/*         SQ_SEL_W                                      = 0x03, */
+/*         SQ_SEL_0                                      = 0x04, */
+/*         SQ_SEL_1                                      = 0x05, */
+/*         SQ_SEL_MASK                                   = 0x07, */
+       SQ_VTX_WORD1__DST_SEL_Y_mask                      = 0x07 << 12,
+       SQ_VTX_WORD1__DST_SEL_Y_shift                     = 12,
+/*         SQ_SEL_X                                      = 0x00, */
+/*         SQ_SEL_Y                                      = 0x01, */
+/*         SQ_SEL_Z                                      = 0x02, */
+/*         SQ_SEL_W                                      = 0x03, */
+/*         SQ_SEL_0                                      = 0x04, */
+/*         SQ_SEL_1                                      = 0x05, */
+/*         SQ_SEL_MASK                                   = 0x07, */
+       SQ_VTX_WORD1__DST_SEL_Z_mask                      = 0x07 << 15,
+       SQ_VTX_WORD1__DST_SEL_Z_shift                     = 15,
+/*         SQ_SEL_X                                      = 0x00, */
+/*         SQ_SEL_Y                                      = 0x01, */
+/*         SQ_SEL_Z                                      = 0x02, */
+/*         SQ_SEL_W                                      = 0x03, */
+/*         SQ_SEL_0                                      = 0x04, */
+/*         SQ_SEL_1                                      = 0x05, */
+/*         SQ_SEL_MASK                                   = 0x07, */
+       SQ_VTX_WORD1__DST_SEL_W_mask                      = 0x07 << 18,
+       SQ_VTX_WORD1__DST_SEL_W_shift                     = 18,
+/*         SQ_SEL_X                                      = 0x00, */
+/*         SQ_SEL_Y                                      = 0x01, */
+/*         SQ_SEL_Z                                      = 0x02, */
+/*         SQ_SEL_W                                      = 0x03, */
+/*         SQ_SEL_0                                      = 0x04, */
+/*         SQ_SEL_1                                      = 0x05, */
+/*         SQ_SEL_MASK                                   = 0x07, */
+       USE_CONST_FIELDS_bit                              = 1 << 21,
+       SQ_VTX_WORD1__DATA_FORMAT_mask                    = 0x3f << 22,
+       SQ_VTX_WORD1__DATA_FORMAT_shift                   = 22,
+       SQ_VTX_WORD1__NUM_FORMAT_ALL_mask                 = 0x03 << 28,
+       SQ_VTX_WORD1__NUM_FORMAT_ALL_shift                = 28,
+           SQ_NUM_FORMAT_NORM                            = 0x00,
+           SQ_NUM_FORMAT_INT                             = 0x01,
+           SQ_NUM_FORMAT_SCALED                          = 0x02,
+       SQ_VTX_WORD1__FORMAT_COMP_ALL_bit                 = 1 << 30,
+       SQ_VTX_WORD1__SRF_MODE_ALL_bit                    = 1 << 31,
+    SQ_ALU_WORD1_OP2                                      = 0x00008dfc,
+/*     SRC0_ABS_bit                                      = 1 << 0, */
+/*     SRC1_ABS_bit                                      = 1 << 1, */
+/*     UPDATE_EXECUTE_MASK_bit                           = 1 << 2, */
+/*     UPDATE_PRED_bit                                   = 1 << 3, */
+/*     WRITE_MASK_bit                                    = 1 << 4, */
+       FOG_MERGE_bit                                     = 1 << 5,
+       SQ_ALU_WORD1_OP2__OMOD_mask                       = 0x03 << 6,
+       SQ_ALU_WORD1_OP2__OMOD_shift                      = 6,
+/*         SQ_ALU_OMOD_OFF                               = 0x00, */
+/*         SQ_ALU_OMOD_M2                                = 0x01, */
+/*         SQ_ALU_OMOD_M4                                = 0x02, */
+/*         SQ_ALU_OMOD_D2                                = 0x03, */
+       SQ_ALU_WORD1_OP2__ALU_INST_mask                   = 0x3ff << 8,
+       SQ_ALU_WORD1_OP2__ALU_INST_shift                  = 8,
+/*         SQ_OP2_INST_ADD                               = 0x00, */
+/*         SQ_OP2_INST_MUL                               = 0x01, */
+/*         SQ_OP2_INST_MUL_IEEE                          = 0x02, */
+/*         SQ_OP2_INST_MAX                               = 0x03, */
+/*         SQ_OP2_INST_MIN                               = 0x04, */
+/*         SQ_OP2_INST_MAX_DX10                          = 0x05, */
+/*         SQ_OP2_INST_MIN_DX10                          = 0x06, */
+/*         SQ_OP2_INST_SETE                              = 0x08, */
+/*         SQ_OP2_INST_SETGT                             = 0x09, */
+/*         SQ_OP2_INST_SETGE                             = 0x0a, */
+/*         SQ_OP2_INST_SETNE                             = 0x0b, */
+/*         SQ_OP2_INST_SETE_DX10                         = 0x0c, */
+/*         SQ_OP2_INST_SETGT_DX10                        = 0x0d, */
+/*         SQ_OP2_INST_SETGE_DX10                        = 0x0e, */
+/*         SQ_OP2_INST_SETNE_DX10                        = 0x0f, */
+/*         SQ_OP2_INST_FRACT                             = 0x10, */
+/*         SQ_OP2_INST_TRUNC                             = 0x11, */
+/*         SQ_OP2_INST_CEIL                              = 0x12, */
+/*         SQ_OP2_INST_RNDNE                             = 0x13, */
+/*         SQ_OP2_INST_FLOOR                             = 0x14, */
+/*         SQ_OP2_INST_MOVA                              = 0x15, */
+/*         SQ_OP2_INST_MOVA_FLOOR                        = 0x16, */
+/*         SQ_OP2_INST_MOVA_INT                          = 0x18, */
+/*         SQ_OP2_INST_MOV                               = 0x19, */
+/*         SQ_OP2_INST_NOP                               = 0x1a, */
+/*         SQ_OP2_INST_PRED_SETGT_UINT                   = 0x1e, */
+/*         SQ_OP2_INST_PRED_SETGE_UINT                   = 0x1f, */
+/*         SQ_OP2_INST_PRED_SETE                         = 0x20, */
+/*         SQ_OP2_INST_PRED_SETGT                        = 0x21, */
+/*         SQ_OP2_INST_PRED_SETGE                        = 0x22, */
+/*         SQ_OP2_INST_PRED_SETNE                        = 0x23, */
+/*         SQ_OP2_INST_PRED_SET_INV                      = 0x24, */
+/*         SQ_OP2_INST_PRED_SET_POP                      = 0x25, */
+/*         SQ_OP2_INST_PRED_SET_CLR                      = 0x26, */
+/*         SQ_OP2_INST_PRED_SET_RESTORE                  = 0x27, */
+/*         SQ_OP2_INST_PRED_SETE_PUSH                    = 0x28, */
+/*         SQ_OP2_INST_PRED_SETGT_PUSH                   = 0x29, */
+/*         SQ_OP2_INST_PRED_SETGE_PUSH                   = 0x2a, */
+/*         SQ_OP2_INST_PRED_SETNE_PUSH                   = 0x2b, */
+/*         SQ_OP2_INST_KILLE                             = 0x2c, */
+/*         SQ_OP2_INST_KILLGT                            = 0x2d, */
+/*         SQ_OP2_INST_KILLGE                            = 0x2e, */
+/*         SQ_OP2_INST_KILLNE                            = 0x2f, */
+/*         SQ_OP2_INST_AND_INT                           = 0x30, */
+/*         SQ_OP2_INST_OR_INT                            = 0x31, */
+/*         SQ_OP2_INST_XOR_INT                           = 0x32, */
+/*         SQ_OP2_INST_NOT_INT                           = 0x33, */
+/*         SQ_OP2_INST_ADD_INT                           = 0x34, */
+/*         SQ_OP2_INST_SUB_INT                           = 0x35, */
+/*         SQ_OP2_INST_MAX_INT                           = 0x36, */
+/*         SQ_OP2_INST_MIN_INT                           = 0x37, */
+/*         SQ_OP2_INST_MAX_UINT                          = 0x38, */
+/*         SQ_OP2_INST_MIN_UINT                          = 0x39, */
+/*         SQ_OP2_INST_SETE_INT                          = 0x3a, */
+/*         SQ_OP2_INST_SETGT_INT                         = 0x3b, */
+/*         SQ_OP2_INST_SETGE_INT                         = 0x3c, */
+/*         SQ_OP2_INST_SETNE_INT                         = 0x3d, */
+/*         SQ_OP2_INST_SETGT_UINT                        = 0x3e, */
+/*         SQ_OP2_INST_SETGE_UINT                        = 0x3f, */
+/*         SQ_OP2_INST_KILLGT_UINT                       = 0x40, */
+/*         SQ_OP2_INST_KILLGE_UINT                       = 0x41, */
+/*         SQ_OP2_INST_PRED_SETE_INT                     = 0x42, */
+/*         SQ_OP2_INST_PRED_SETGT_INT                    = 0x43, */
+/*         SQ_OP2_INST_PRED_SETGE_INT                    = 0x44, */
+/*         SQ_OP2_INST_PRED_SETNE_INT                    = 0x45, */
+/*         SQ_OP2_INST_KILLE_INT                         = 0x46, */
+/*         SQ_OP2_INST_KILLGT_INT                        = 0x47, */
+/*         SQ_OP2_INST_KILLGE_INT                        = 0x48, */
+/*         SQ_OP2_INST_KILLNE_INT                        = 0x49, */
+/*         SQ_OP2_INST_PRED_SETE_PUSH_INT                = 0x4a, */
+/*         SQ_OP2_INST_PRED_SETGT_PUSH_INT               = 0x4b, */
+/*         SQ_OP2_INST_PRED_SETGE_PUSH_INT               = 0x4c, */
+/*         SQ_OP2_INST_PRED_SETNE_PUSH_INT               = 0x4d, */
+/*         SQ_OP2_INST_PRED_SETLT_PUSH_INT               = 0x4e, */
+/*         SQ_OP2_INST_PRED_SETLE_PUSH_INT               = 0x4f, */
+/*         SQ_OP2_INST_DOT4                              = 0x50, */
+/*         SQ_OP2_INST_DOT4_IEEE                         = 0x51, */
+/*         SQ_OP2_INST_CUBE                              = 0x52, */
+/*         SQ_OP2_INST_MAX4                              = 0x53, */
+/*         SQ_OP2_INST_MOVA_GPR_INT                      = 0x60, */
+/*         SQ_OP2_INST_EXP_IEEE                          = 0x61, */
+/*         SQ_OP2_INST_LOG_CLAMPED                       = 0x62, */
+/*         SQ_OP2_INST_LOG_IEEE                          = 0x63, */
+/*         SQ_OP2_INST_RECIP_CLAMPED                     = 0x64, */
+/*         SQ_OP2_INST_RECIP_FF                          = 0x65, */
+/*         SQ_OP2_INST_RECIP_IEEE                        = 0x66, */
+/*         SQ_OP2_INST_RECIPSQRT_CLAMPED                 = 0x67, */
+/*         SQ_OP2_INST_RECIPSQRT_FF                      = 0x68, */
+/*         SQ_OP2_INST_RECIPSQRT_IEEE                    = 0x69, */
+/*         SQ_OP2_INST_SQRT_IEEE                         = 0x6a, */
+/*         SQ_OP2_INST_FLT_TO_INT                        = 0x6b, */
+/*         SQ_OP2_INST_INT_TO_FLT                        = 0x6c, */
+/*         SQ_OP2_INST_UINT_TO_FLT                       = 0x6d, */
+/*         SQ_OP2_INST_SIN                               = 0x6e, */
+/*         SQ_OP2_INST_COS                               = 0x6f, */
+/*         SQ_OP2_INST_ASHR_INT                          = 0x70, */
+/*         SQ_OP2_INST_LSHR_INT                          = 0x71, */
+/*         SQ_OP2_INST_LSHL_INT                          = 0x72, */
+/*         SQ_OP2_INST_MULLO_INT                         = 0x73, */
+/*         SQ_OP2_INST_MULHI_INT                         = 0x74, */
+/*         SQ_OP2_INST_MULLO_UINT                        = 0x75, */
+/*         SQ_OP2_INST_MULHI_UINT                        = 0x76, */
+/*         SQ_OP2_INST_RECIP_INT                         = 0x77, */
+/*         SQ_OP2_INST_RECIP_UINT                        = 0x78, */
+/*         SQ_OP2_INST_FLT_TO_UINT                       = 0x79, */
+    SQ_CF_WORD1                                           = 0x00008dfc,
+       POP_COUNT_mask                                    = 0x07 << 0,
+       POP_COUNT_shift                                   = 0,
+       CF_CONST_mask                                     = 0x1f << 3,
+       CF_CONST_shift                                    = 3,
+       COND_mask                                         = 0x03 << 8,
+       COND_shift                                        = 8,
+           SQ_CF_COND_ACTIVE                             = 0x00,
+           SQ_CF_COND_FALSE                              = 0x01,
+           SQ_CF_COND_BOOL                               = 0x02,
+           SQ_CF_COND_NOT_BOOL                           = 0x03,
+       SQ_CF_WORD1__COUNT_mask                           = 0x07 << 10,
+       SQ_CF_WORD1__COUNT_shift                          = 10,
+       CALL_COUNT_mask                                   = 0x3f << 13,
+       CALL_COUNT_shift                                  = 13,
+       COUNT_3_bit                                       = 1 << 19,
+/*     END_OF_PROGRAM_bit                                = 1 << 21, */
+/*     VALID_PIXEL_MODE_bit                              = 1 << 22, */
+       SQ_CF_WORD1__CF_INST_mask                         = 0x7f << 23,
+       SQ_CF_WORD1__CF_INST_shift                        = 23,
+           SQ_CF_INST_NOP                                = 0x00,
+           SQ_CF_INST_TEX                                = 0x01,
+           SQ_CF_INST_VTX                                = 0x02,
+           SQ_CF_INST_VTX_TC                             = 0x03,
+           SQ_CF_INST_LOOP_START                         = 0x04,
+           SQ_CF_INST_LOOP_END                           = 0x05,
+           SQ_CF_INST_LOOP_START_DX10                    = 0x06,
+           SQ_CF_INST_LOOP_START_NO_AL                   = 0x07,
+           SQ_CF_INST_LOOP_CONTINUE                      = 0x08,
+           SQ_CF_INST_LOOP_BREAK                         = 0x09,
+           SQ_CF_INST_JUMP                               = 0x0a,
+           SQ_CF_INST_PUSH                               = 0x0b,
+           SQ_CF_INST_PUSH_ELSE                          = 0x0c,
+           SQ_CF_INST_ELSE                               = 0x0d,
+           SQ_CF_INST_POP                                = 0x0e,
+           SQ_CF_INST_POP_JUMP                           = 0x0f,
+           SQ_CF_INST_POP_PUSH                           = 0x10,
+           SQ_CF_INST_POP_PUSH_ELSE                      = 0x11,
+           SQ_CF_INST_CALL                               = 0x12,
+           SQ_CF_INST_CALL_FS                            = 0x13,
+           SQ_CF_INST_RETURN                             = 0x14,
+           SQ_CF_INST_EMIT_VERTEX                        = 0x15,
+           SQ_CF_INST_EMIT_CUT_VERTEX                    = 0x16,
+           SQ_CF_INST_CUT_VERTEX                         = 0x17,
+           SQ_CF_INST_KILL                               = 0x18,
+/*     WHOLE_QUAD_MODE_bit                               = 1 << 30, */
+/*     BARRIER_bit                                       = 1 << 31, */
+    SQ_VTX_WORD1_SEM                                      = 0x00008dfc,
+       SEMANTIC_ID_mask                                  = 0xff << 0,
+       SEMANTIC_ID_shift                                 = 0,
+    SQ_TEX_WORD0                                          = 0x00008dfc,
+       TEX_INST_mask                                     = 0x1f << 0,
+       TEX_INST_shift                                    = 0,
+           SQ_TEX_INST_VTX_FETCH                         = 0x00,
+           SQ_TEX_INST_VTX_SEMANTIC                      = 0x01,
+           SQ_TEX_INST_LD                                = 0x03,
+           SQ_TEX_INST_GET_TEXTURE_RESINFO               = 0x04,
+           SQ_TEX_INST_GET_NUMBER_OF_SAMPLES             = 0x05,
+           SQ_TEX_INST_GET_LOD                           = 0x06,
+           SQ_TEX_INST_GET_GRADIENTS_H                   = 0x07,
+           SQ_TEX_INST_GET_GRADIENTS_V                   = 0x08,
+           SQ_TEX_INST_GET_LERP                          = 0x09,
+           SQ_TEX_INST_RESERVED_10                       = 0x0a,
+           SQ_TEX_INST_SET_GRADIENTS_H                   = 0x0b,
+           SQ_TEX_INST_SET_GRADIENTS_V                   = 0x0c,
+           SQ_TEX_INST_PASS                              = 0x0d,
+           X_Z_SET_INDEX_FOR_ARRAY_OF_CUBEMAPS           = 0x0e,
+           SQ_TEX_INST_SAMPLE                            = 0x10,
+           SQ_TEX_INST_SAMPLE_L                          = 0x11,
+           SQ_TEX_INST_SAMPLE_LB                         = 0x12,
+           SQ_TEX_INST_SAMPLE_LZ                         = 0x13,
+           SQ_TEX_INST_SAMPLE_G                          = 0x14,
+           SQ_TEX_INST_SAMPLE_G_L                        = 0x15,
+           SQ_TEX_INST_SAMPLE_G_LB                       = 0x16,
+           SQ_TEX_INST_SAMPLE_G_LZ                       = 0x17,
+           SQ_TEX_INST_SAMPLE_C                          = 0x18,
+           SQ_TEX_INST_SAMPLE_C_L                        = 0x19,
+           SQ_TEX_INST_SAMPLE_C_LB                       = 0x1a,
+           SQ_TEX_INST_SAMPLE_C_LZ                       = 0x1b,
+           SQ_TEX_INST_SAMPLE_C_G                        = 0x1c,
+           SQ_TEX_INST_SAMPLE_C_G_L                      = 0x1d,
+           SQ_TEX_INST_SAMPLE_C_G_LB                     = 0x1e,
+           SQ_TEX_INST_SAMPLE_C_G_LZ                     = 0x1f,
+       BC_FRAC_MODE_bit                                  = 1 << 5,
+/*     FETCH_WHOLE_QUAD_bit                              = 1 << 7, */
+       RESOURCE_ID_mask                                  = 0xff << 8,
+       RESOURCE_ID_shift                                 = 8,
+/*     SRC_GPR_mask                                      = 0x7f << 16, */
+/*     SRC_GPR_shift                                     = 16, */
+/*     SRC_REL_bit                                       = 1 << 23, */
+       SQ_TEX_WORD0__ALT_CONST_bit                       = 1 << 24,
+    SQ_VTX_WORD1_GPR                                      = 0x00008dfc,
+       SQ_VTX_WORD1_GPR__DST_GPR_mask                    = 0x7f << 0,
+       SQ_VTX_WORD1_GPR__DST_GPR_shift                   = 0,
+       SQ_VTX_WORD1_GPR__DST_REL_bit                     = 1 << 7,
+    SQ_ALU_WORD0                                          = 0x00008dfc,
+       SRC0_SEL_mask                                     = 0x1ff << 0,
+       SRC0_SEL_shift                                    = 0,
+/*         SQ_ALU_SRC_0                                  = 0xf8, */
+/*         SQ_ALU_SRC_1                                  = 0xf9, */
+/*         SQ_ALU_SRC_1_INT                              = 0xfa, */
+/*         SQ_ALU_SRC_M_1_INT                            = 0xfb, */
+/*         SQ_ALU_SRC_0_5                                = 0xfc, */
+/*         SQ_ALU_SRC_LITERAL                            = 0xfd, */
+/*         SQ_ALU_SRC_PV                                 = 0xfe, */
+/*         SQ_ALU_SRC_PS                                 = 0xff, */
+       SRC0_REL_bit                                      = 1 << 9,
+       SRC0_CHAN_mask                                    = 0x03 << 10,
+       SRC0_CHAN_shift                                   = 10,
+/*         SQ_CHAN_X                                     = 0x00, */
+/*         SQ_CHAN_Y                                     = 0x01, */
+/*         SQ_CHAN_Z                                     = 0x02, */
+/*         SQ_CHAN_W                                     = 0x03, */
+       SRC0_NEG_bit                                      = 1 << 12,
+       SRC1_SEL_mask                                     = 0x1ff << 13,
+       SRC1_SEL_shift                                    = 13,
+/*         SQ_ALU_SRC_0                                  = 0xf8, */
+/*         SQ_ALU_SRC_1                                  = 0xf9, */
+/*         SQ_ALU_SRC_1_INT                              = 0xfa, */
+/*         SQ_ALU_SRC_M_1_INT                            = 0xfb, */
+/*         SQ_ALU_SRC_0_5                                = 0xfc, */
+/*         SQ_ALU_SRC_LITERAL                            = 0xfd, */
+/*         SQ_ALU_SRC_PV                                 = 0xfe, */
+/*         SQ_ALU_SRC_PS                                 = 0xff, */
+       SRC1_REL_bit                                      = 1 << 22,
+       SRC1_CHAN_mask                                    = 0x03 << 23,
+       SRC1_CHAN_shift                                   = 23,
+/*         SQ_CHAN_X                                     = 0x00, */
+/*         SQ_CHAN_Y                                     = 0x01, */
+/*         SQ_CHAN_Z                                     = 0x02, */
+/*         SQ_CHAN_W                                     = 0x03, */
+       SRC1_NEG_bit                                      = 1 << 25,
+       INDEX_MODE_mask                                   = 0x07 << 26,
+       INDEX_MODE_shift                                  = 26,
+           SQ_INDEX_AR_X                                 = 0x00,
+           SQ_INDEX_AR_Y                                 = 0x01,
+           SQ_INDEX_AR_Z                                 = 0x02,
+           SQ_INDEX_AR_W                                 = 0x03,
+           SQ_INDEX_LOOP                                 = 0x04,
+       PRED_SEL_mask                                     = 0x03 << 29,
+       PRED_SEL_shift                                    = 29,
+           SQ_PRED_SEL_OFF                               = 0x00,
+           SQ_PRED_SEL_ZERO                              = 0x02,
+           SQ_PRED_SEL_ONE                               = 0x03,
+       LAST_bit                                          = 1 << 31,
+    SX_EXPORT_BUFFER_SIZES                                = 0x0000900c,
+       COLOR_BUFFER_SIZE_mask                            = 0xff << 0,
+       COLOR_BUFFER_SIZE_shift                           = 0,
+       POSITION_BUFFER_SIZE_mask                         = 0xff << 8,
+       POSITION_BUFFER_SIZE_shift                        = 8,
+       SMX_BUFFER_SIZE_mask                              = 0xff << 16,
+       SMX_BUFFER_SIZE_shift                             = 16,
+    SX_MEMORY_EXPORT_BASE                                 = 0x00009010,
+    SX_MEMORY_EXPORT_SIZE                                 = 0x00009014,
+    SPI_CONFIG_CNTL                                       = 0x00009100,
+       GPR_WRITE_PRIORITY_mask                           = 0x1f << 0,
+       GPR_WRITE_PRIORITY_shift                          = 0,
+           X_PRIORITY_ORDER                              = 0x00,
+           X_PRIORITY_ORDER_VS                           = 0x01,
+       DISABLE_INTERP_1_bit                              = 1 << 5,
+       DEBUG_THREAD_TYPE_SEL_mask                        = 0x03 << 6,
+       DEBUG_THREAD_TYPE_SEL_shift                       = 6,
+       DEBUG_GROUP_SEL_mask                              = 0x1f << 8,
+       DEBUG_GROUP_SEL_shift                             = 8,
+       DEBUG_GRBM_OVERRIDE_bit                           = 1 << 13,
+    SPI_CONFIG_CNTL_1                                     = 0x0000913c,
+       VTX_DONE_DELAY_mask                               = 0x0f << 0,
+       VTX_DONE_DELAY_shift                              = 0,
+           X_DELAY_10_CLKS                               = 0x00,
+           X_DELAY_11_CLKS                               = 0x01,
+           X_DELAY_12_CLKS                               = 0x02,
+           X_DELAY_13_CLKS                               = 0x03,
+           X_DELAY_14_CLKS                               = 0x04,
+           X_DELAY_15_CLKS                               = 0x05,
+           X_DELAY_16_CLKS                               = 0x06,
+           X_DELAY_17_CLKS                               = 0x07,
+           X_DELAY_2_CLKS                                = 0x08,
+           X_DELAY_3_CLKS                                = 0x09,
+           X_DELAY_4_CLKS                                = 0x0a,
+           X_DELAY_5_CLKS                                = 0x0b,
+           X_DELAY_6_CLKS                                = 0x0c,
+           X_DELAY_7_CLKS                                = 0x0d,
+           X_DELAY_8_CLKS                                = 0x0e,
+           X_DELAY_9_CLKS                                = 0x0f,
+       INTERP_ONE_PRIM_PER_ROW_bit                       = 1 << 4,
+    TD_FILTER4                                            = 0x00009400,
+       WEIGHT_1_mask                                     = 0x7ff << 0,
+       WEIGHT_1_shift                                    = 0,
+       WEIGHT_0_mask                                     = 0x7ff << 11,
+       WEIGHT_0_shift                                    = 11,
+       WEIGHT_PAIR_bit                                   = 1 << 22,
+       PHASE_mask                                        = 0x0f << 23,
+       PHASE_shift                                       = 23,
+       DIRECTION_bit                                     = 1 << 27,
+    TD_FILTER4_1                                          = 0x00009404,
+       TD_FILTER4_1_num                                  = 35,
+/*     WEIGHT_1_mask                                     = 0x7ff << 0, */
+/*     WEIGHT_1_shift                                    = 0, */
+/*     WEIGHT_0_mask                                     = 0x7ff << 11, */
+/*     WEIGHT_0_shift                                    = 11, */
+    TD_CNTL                                               = 0x00009490,
+       SYNC_PHASE_SH_mask                                = 0x03 << 0,
+       SYNC_PHASE_SH_shift                               = 0,
+       SYNC_PHASE_VC_SMX_mask                            = 0x03 << 4,
+       SYNC_PHASE_VC_SMX_shift                           = 4,
+    TD0_CNTL                                              = 0x00009494,
+       TD0_CNTL_num                                      = 4,
+       ID_OVERRIDE_mask                                  = 0x03 << 28,
+       ID_OVERRIDE_shift                                 = 28,
+    TD0_STATUS                                            = 0x000094a4,
+       TD0_STATUS_num                                    = 4,
+       BUSY_bit                                          = 1 << 31,
+    TA_CNTL                                               = 0x00009504,
+       GRADIENT_CREDIT_mask                              = 0x1f << 0,
+       GRADIENT_CREDIT_shift                             = 0,
+       WALKER_CREDIT_mask                                = 0x1f << 8,
+       WALKER_CREDIT_shift                               = 8,
+       ALIGNER_CREDIT_mask                               = 0x1f << 16,
+       ALIGNER_CREDIT_shift                              = 16,
+       TD_FIFO_CREDIT_mask                               = 0x3ff << 22,
+       TD_FIFO_CREDIT_shift                              = 22,
+    TA_CNTL_AUX                                           = 0x00009508,
+       DISABLE_CUBE_WRAP_bit                             = 1 << 0,
+       SYNC_GRADIENT_bit                                 = 1 << 24,
+       SYNC_WALKER_bit                                   = 1 << 25,
+       SYNC_ALIGNER_bit                                  = 1 << 26,
+       BILINEAR_PRECISION_bit                            = 1 << 31,
+    TA0_CNTL                                              = 0x00009510,
+/*     ID_OVERRIDE_mask                                  = 0x03 << 28, */
+/*     ID_OVERRIDE_shift                                 = 28, */
+    TA1_CNTL                                              = 0x00009514,
+/*     ID_OVERRIDE_mask                                  = 0x03 << 28, */
+/*     ID_OVERRIDE_shift                                 = 28, */
+    TA2_CNTL                                              = 0x00009518,
+/*     ID_OVERRIDE_mask                                  = 0x03 << 28, */
+/*     ID_OVERRIDE_shift                                 = 28, */
+    TA3_CNTL                                              = 0x0000951c,
+/*     ID_OVERRIDE_mask                                  = 0x03 << 28, */
+/*     ID_OVERRIDE_shift                                 = 28, */
+    TA0_STATUS                                            = 0x00009520,
+       FG_PFIFO_EMPTYB_bit                               = 1 << 12,
+       FG_LFIFO_EMPTYB_bit                               = 1 << 13,
+       FG_SFIFO_EMPTYB_bit                               = 1 << 14,
+       FL_PFIFO_EMPTYB_bit                               = 1 << 16,
+       FL_LFIFO_EMPTYB_bit                               = 1 << 17,
+       FL_SFIFO_EMPTYB_bit                               = 1 << 18,
+       FA_PFIFO_EMPTYB_bit                               = 1 << 20,
+       FA_LFIFO_EMPTYB_bit                               = 1 << 21,
+       FA_SFIFO_EMPTYB_bit                               = 1 << 22,
+       IN_BUSY_bit                                       = 1 << 24,
+       FG_BUSY_bit                                       = 1 << 25,
+       FL_BUSY_bit                                       = 1 << 27,
+       TA_BUSY_bit                                       = 1 << 28,
+       FA_BUSY_bit                                       = 1 << 29,
+       AL_BUSY_bit                                       = 1 << 30,
+/*     BUSY_bit                                          = 1 << 31, */
+    TA1_STATUS                                            = 0x00009524,
+/*     FG_PFIFO_EMPTYB_bit                               = 1 << 12, */
+/*     FG_LFIFO_EMPTYB_bit                               = 1 << 13, */
+/*     FG_SFIFO_EMPTYB_bit                               = 1 << 14, */
+/*     FL_PFIFO_EMPTYB_bit                               = 1 << 16, */
+/*     FL_LFIFO_EMPTYB_bit                               = 1 << 17, */
+/*     FL_SFIFO_EMPTYB_bit                               = 1 << 18, */
+/*     FA_PFIFO_EMPTYB_bit                               = 1 << 20, */
+/*     FA_LFIFO_EMPTYB_bit                               = 1 << 21, */
+/*     FA_SFIFO_EMPTYB_bit                               = 1 << 22, */
+/*     IN_BUSY_bit                                       = 1 << 24, */
+/*     FG_BUSY_bit                                       = 1 << 25, */
+/*     FL_BUSY_bit                                       = 1 << 27, */
+/*     TA_BUSY_bit                                       = 1 << 28, */
+/*     FA_BUSY_bit                                       = 1 << 29, */
+/*     AL_BUSY_bit                                       = 1 << 30, */
+/*     BUSY_bit                                          = 1 << 31, */
+    TA2_STATUS                                            = 0x00009528,
+/*     FG_PFIFO_EMPTYB_bit                               = 1 << 12, */
+/*     FG_LFIFO_EMPTYB_bit                               = 1 << 13, */
+/*     FG_SFIFO_EMPTYB_bit                               = 1 << 14, */
+/*     FL_PFIFO_EMPTYB_bit                               = 1 << 16, */
+/*     FL_LFIFO_EMPTYB_bit                               = 1 << 17, */
+/*     FL_SFIFO_EMPTYB_bit                               = 1 << 18, */
+/*     FA_PFIFO_EMPTYB_bit                               = 1 << 20, */
+/*     FA_LFIFO_EMPTYB_bit                               = 1 << 21, */
+/*     FA_SFIFO_EMPTYB_bit                               = 1 << 22, */
+/*     IN_BUSY_bit                                       = 1 << 24, */
+/*     FG_BUSY_bit                                       = 1 << 25, */
+/*     FL_BUSY_bit                                       = 1 << 27, */
+/*     TA_BUSY_bit                                       = 1 << 28, */
+/*     FA_BUSY_bit                                       = 1 << 29, */
+/*     AL_BUSY_bit                                       = 1 << 30, */
+/*     BUSY_bit                                          = 1 << 31, */
+    TA3_STATUS                                            = 0x0000952c,
+/*     FG_PFIFO_EMPTYB_bit                               = 1 << 12, */
+/*     FG_LFIFO_EMPTYB_bit                               = 1 << 13, */
+/*     FG_SFIFO_EMPTYB_bit                               = 1 << 14, */
+/*     FL_PFIFO_EMPTYB_bit                               = 1 << 16, */
+/*     FL_LFIFO_EMPTYB_bit                               = 1 << 17, */
+/*     FL_SFIFO_EMPTYB_bit                               = 1 << 18, */
+/*     FA_PFIFO_EMPTYB_bit                               = 1 << 20, */
+/*     FA_LFIFO_EMPTYB_bit                               = 1 << 21, */
+/*     FA_SFIFO_EMPTYB_bit                               = 1 << 22, */
+/*     IN_BUSY_bit                                       = 1 << 24, */
+/*     FG_BUSY_bit                                       = 1 << 25, */
+/*     FL_BUSY_bit                                       = 1 << 27, */
+/*     TA_BUSY_bit                                       = 1 << 28, */
+/*     FA_BUSY_bit                                       = 1 << 29, */
+/*     AL_BUSY_bit                                       = 1 << 30, */
+/*     BUSY_bit                                          = 1 << 31, */
+    TC_STATUS                                             = 0x00009600,
+       TC_BUSY_bit                                       = 1 << 0,
+    TC_INVALIDATE                                         = 0x00009604,
+       START_bit                                         = 1 << 0,
+    TC_CNTL                                               = 0x00009608,
+       FORCE_HIT_bit                                     = 1 << 0,
+       FORCE_MISS_bit                                    = 1 << 1,
+       L2_SIZE_mask                                      = 0x0f << 5,
+       L2_SIZE_shift                                     = 5,
+           _256K                                         = 0x00,
+           _224K                                         = 0x01,
+           _192K                                         = 0x02,
+           _160K                                         = 0x03,
+           _128K                                         = 0x04,
+           _96K                                          = 0x05,
+           _64K                                          = 0x06,
+           _32K                                          = 0x07,
+       L2_DISABLE_LATE_HIT_bit                           = 1 << 9,
+       DISABLE_VERT_PERF_bit                             = 1 << 10,
+       DISABLE_INVAL_BUSY_bit                            = 1 << 11,
+       DISABLE_INVAL_SAME_SURFACE_bit                    = 1 << 12,
+       PARTITION_MODE_mask                               = 0x03 << 13,
+       PARTITION_MODE_shift                              = 13,
+           X_VERTEX                                      = 0x00,
+       MISS_ARB_MODE_bit                                 = 1 << 15,
+       HIT_ARB_MODE_bit                                  = 1 << 16,
+       DISABLE_WRITE_DELAY_bit                           = 1 << 17,
+       HIT_FIFO_DEPTH_bit                                = 1 << 18,
+    VC_CNTL                                               = 0x00009700,
+       L2_INVALIDATE_bit                                 = 1 << 0,
+       RESERVED_bit                                      = 1 << 1,
+       CC_FORCE_MISS_bit                                 = 1 << 2,
+       MI_CHAN_SEL_mask                                  = 0x03 << 3,
+       MI_CHAN_SEL_shift                                 = 3,
+           X_MC0_USES_CH_0_1                             = 0x00,
+           X_MC0_USES_CH_0_3                             = 0x01,
+           X_VC_MC0_IS_ACTIVE                            = 0x02,
+           X_VC_MC1_IS_DISABLED                          = 0x03,
+       MI_STEER_DISABLE_bit                              = 1 << 5,
+       MI_CREDIT_CTR_mask                                = 0x0f << 6,
+       MI_CREDIT_CTR_shift                               = 6,
+       MI_CREDIT_WE_bit                                  = 1 << 10,
+       MI_REQ_STALL_THLD_mask                            = 0x07 << 11,
+       MI_REQ_STALL_THLD_shift                           = 11,
+           X_LATENCY_EXCEEDS_399_CLOCKS                  = 0x00,
+           X_LATENCY_EXCEEDS_415_CLOCKS                  = 0x01,
+           X_LATENCY_EXCEEDS_431_CLOCKS                  = 0x02,
+           X_LATENCY_EXCEEDS_447_CLOCKS                  = 0x03,
+           X_LATENCY_EXCEEDS_463_CLOCKS                  = 0x04,
+           X_LATENCY_EXCEEDS_479_CLOCKS                  = 0x05,
+           X_LATENCY_EXCEEDS_495_CLOCKS                  = 0x06,
+           X_LATENCY_EXCEEDS_511_CLOCKS                  = 0x07,
+       VC_CNTL__MI_TIMESTAMP_RES_mask                    = 0x1f << 14,
+       VC_CNTL__MI_TIMESTAMP_RES_shift                   = 14,
+           X_1X_SYSTEM_CLOCK                             = 0x00,
+           X_2X_SYSTEM_CLOCK                             = 0x01,
+           X_4X_SYSTEM_CLOCK                             = 0x02,
+           X_8X_SYSTEM_CLOCK                             = 0x03,
+           X_16X_SYSTEM_CLOCK                            = 0x04,
+           X_32X_SYSTEM_CLOCK                            = 0x05,
+           X_64X_SYSTEM_CLOCK                            = 0x06,
+           X_128X_SYSTEM_CLOCK                           = 0x07,
+           X_256X_SYSTEM_CLOCK                           = 0x08,
+           X_512X_SYSTEM_CLOCK                           = 0x09,
+           X_1024X_SYSTEM_CLOCK                          = 0x0a,
+           X_2048X_SYSTEM_CLOCK                          = 0x0b,
+           X_4092X_SYSTEM_CLOCK                          = 0x0c,
+           X_8192X_SYSTEM_CLOCK                          = 0x0d,
+           X_16384X_SYSTEM_CLOCK                         = 0x0e,
+           X_32768X_SYSTEM_CLOCK                         = 0x0f,
+    VC_CNTL_STATUS                                        = 0x00009704,
+       RP_BUSY_bit                                       = 1 << 0,
+       RG_BUSY_bit                                       = 1 << 1,
+       VC_BUSY_bit                                       = 1 << 2,
+       CLAMP_DETECT_bit                                  = 1 << 3,
+    VC_CONFIG                                             = 0x00009718,
+       WRITE_DIS_bit                                     = 1 << 0,
+       GPR_DATA_PHASE_ADJ_mask                           = 0x07 << 1,
+       GPR_DATA_PHASE_ADJ_shift                          = 1,
+           X_LATENCY_BASE_0_CYCLES                       = 0x00,
+           X_LATENCY_BASE_1_CYCLES                       = 0x01,
+           X_LATENCY_BASE_2_CYCLES                       = 0x02,
+           X_LATENCY_BASE_3_CYCLES                       = 0x03,
+       TD_SIMD_SYNC_ADJ_mask                             = 0x07 << 4,
+       TD_SIMD_SYNC_ADJ_shift                            = 4,
+           X_0_CYCLES_DELAY                              = 0x00,
+           X_1_CYCLES_DELAY                              = 0x01,
+           X_2_CYCLES_DELAY                              = 0x02,
+           X_3_CYCLES_DELAY                              = 0x03,
+           X_4_CYCLES_DELAY                              = 0x04,
+           X_5_CYCLES_DELAY                              = 0x05,
+           X_6_CYCLES_DELAY                              = 0x06,
+           X_7_CYCLES_DELAY                              = 0x07,
+    SMX_DC_CTL0                                           = 0x0000a020,
+       WR_GATHER_STREAM0_bit                             = 1 << 0,
+       WR_GATHER_STREAM1_bit                             = 1 << 1,
+       WR_GATHER_STREAM2_bit                             = 1 << 2,
+       WR_GATHER_STREAM3_bit                             = 1 << 3,
+       WR_GATHER_SCRATCH_bit                             = 1 << 4,
+       WR_GATHER_REDUC_BUF_bit                           = 1 << 5,
+       WR_GATHER_RING_BUF_bit                            = 1 << 6,
+       WR_GATHER_F_BUF_bit                               = 1 << 7,
+       DISABLE_CACHES_bit                                = 1 << 8,
+       AUTO_FLUSH_INVAL_EN_bit                           = 1 << 10,
+       AUTO_FLUSH_EN_bit                                 = 1 << 11,
+       AUTO_FLUSH_CNT_mask                               = 0xffff << 12,
+       AUTO_FLUSH_CNT_shift                              = 12,
+       MC_RD_STALL_FACTOR_mask                           = 0x03 << 28,
+       MC_RD_STALL_FACTOR_shift                          = 28,
+       MC_WR_STALL_FACTOR_mask                           = 0x03 << 30,
+       MC_WR_STALL_FACTOR_shift                          = 30,
+    SMX_DC_CTL1                                           = 0x0000a024,
+       OP_FIFO_SKID_mask                                 = 0x7f << 0,
+       OP_FIFO_SKID_shift                                = 0,
+       CACHE_LINE_SIZE_bit                               = 1 << 8,
+       MULTI_FLUSH_MODE_bit                              = 1 << 9,
+       MULTI_FLUSH_REQ_ABORT_IDX_FIFO_SKID_mask          = 0x0f << 10,
+       MULTI_FLUSH_REQ_ABORT_IDX_FIFO_SKID_shift         = 10,
+       DISABLE_WR_GATHER_RD_HIT_FORCE_EVICT_bit          = 1 << 16,
+       DISABLE_WR_GATHER_RD_HIT_COMP_VLDS_CHECK_bit      = 1 << 17,
+       DISABLE_FLUSH_ES_ALSO_INVALS_bit                  = 1 << 18,
+       DISABLE_FLUSH_GS_ALSO_INVALS_bit                  = 1 << 19,
+    SMX_DC_CTL2                                           = 0x0000a028,
+       INVALIDATE_CACHES_bit                             = 1 << 0,
+       CACHES_INVALID_bit                                = 1 << 1,
+       CACHES_DIRTY_bit                                  = 1 << 2,
+       FLUSH_ALL_bit                                     = 1 << 4,
+       FLUSH_GS_THREADS_bit                              = 1 << 8,
+       FLUSH_ES_THREADS_bit                              = 1 << 9,
+    SMX_DC_MC_INTF_CTL                                    = 0x0000a02c,
+       MC_RD_REQ_CRED_mask                               = 0xff << 0,
+       MC_RD_REQ_CRED_shift                              = 0,
+       MC_WR_REQ_CRED_mask                               = 0xff << 16,
+       MC_WR_REQ_CRED_shift                              = 16,
+    TD_PS_SAMPLER0_BORDER_RED                             = 0x0000a400,
+       TD_PS_SAMPLER0_BORDER_RED_num                     = 18,
+       TD_PS_SAMPLER0_BORDER_RED_offset                  = 16,
+    TD_PS_SAMPLER0_BORDER_GREEN                           = 0x0000a404,
+       TD_PS_SAMPLER0_BORDER_GREEN_num                   = 18,
+       TD_PS_SAMPLER0_BORDER_GREEN_offset                = 16,
+    TD_PS_SAMPLER0_BORDER_BLUE                            = 0x0000a408,
+       TD_PS_SAMPLER0_BORDER_BLUE_num                    = 18,
+       TD_PS_SAMPLER0_BORDER_BLUE_offset                 = 16,
+    TD_PS_SAMPLER0_BORDER_ALPHA                           = 0x0000a40c,
+       TD_PS_SAMPLER0_BORDER_ALPHA_num                   = 18,
+       TD_PS_SAMPLER0_BORDER_ALPHA_offset                = 16,
+    TD_VS_SAMPLER0_BORDER_RED                             = 0x0000a600,
+       TD_VS_SAMPLER0_BORDER_RED_num                     = 18,
+       TD_VS_SAMPLER0_BORDER_RED_offset                  = 16,
+    TD_VS_SAMPLER0_BORDER_GREEN                           = 0x0000a604,
+       TD_VS_SAMPLER0_BORDER_GREEN_num                   = 18,
+       TD_VS_SAMPLER0_BORDER_GREEN_offset                = 16,
+    TD_VS_SAMPLER0_BORDER_BLUE                            = 0x0000a608,
+       TD_VS_SAMPLER0_BORDER_BLUE_num                    = 18,
+       TD_VS_SAMPLER0_BORDER_BLUE_offset                 = 16,
+    TD_VS_SAMPLER0_BORDER_ALPHA                           = 0x0000a60c,
+       TD_VS_SAMPLER0_BORDER_ALPHA_num                   = 18,
+       TD_VS_SAMPLER0_BORDER_ALPHA_offset                = 16,
+    TD_GS_SAMPLER0_BORDER_RED                             = 0x0000a800,
+       TD_GS_SAMPLER0_BORDER_RED_num                     = 18,
+       TD_GS_SAMPLER0_BORDER_RED_offset                  = 16,
+    TD_GS_SAMPLER0_BORDER_GREEN                           = 0x0000a804,
+       TD_GS_SAMPLER0_BORDER_GREEN_num                   = 18,
+       TD_GS_SAMPLER0_BORDER_GREEN_offset                = 16,
+    TD_GS_SAMPLER0_BORDER_BLUE                            = 0x0000a808,
+       TD_GS_SAMPLER0_BORDER_BLUE_num                    = 18,
+       TD_GS_SAMPLER0_BORDER_BLUE_offset                 = 16,
+    TD_GS_SAMPLER0_BORDER_ALPHA                           = 0x0000a80c,
+       TD_GS_SAMPLER0_BORDER_ALPHA_num                   = 18,
+       TD_GS_SAMPLER0_BORDER_ALPHA_offset                = 16,
+    TD_PS_SAMPLER0_CLEARTYPE_KERNEL                       = 0x0000aa00,
+       TD_PS_SAMPLER0_CLEARTYPE_KERNEL_num               = 18,
+       TD_PS_SAMPLER0_CLEARTYPE_KERNEL__WIDTH_mask       = 0x07 << 0,
+       TD_PS_SAMPLER0_CLEARTYPE_KERNEL__WIDTH_shift      = 0,
+       TD_PS_SAMPLER0_CLEARTYPE_KERNEL__HEIGHT_mask      = 0x07 << 3,
+       TD_PS_SAMPLER0_CLEARTYPE_KERNEL__HEIGHT_shift     = 3,
+    DB_DEPTH_SIZE                                         = 0x00028000,
+       PITCH_TILE_MAX_mask                               = 0x3ff << 0,
+       PITCH_TILE_MAX_shift                              = 0,
+       SLICE_TILE_MAX_mask                               = 0xfffff << 10,
+       SLICE_TILE_MAX_shift                              = 10,
+    DB_DEPTH_VIEW                                         = 0x00028004,
+       SLICE_START_mask                                  = 0x7ff << 0,
+       SLICE_START_shift                                 = 0,
+       SLICE_MAX_mask                                    = 0x7ff << 13,
+       SLICE_MAX_shift                                   = 13,
+    DB_DEPTH_BASE                                         = 0x0002800c,
+    DB_DEPTH_INFO                                         = 0x00028010,
+       DB_DEPTH_INFO__FORMAT_mask                        = 0x07 << 0,
+       DB_DEPTH_INFO__FORMAT_shift                       = 0,
+           DEPTH_INVALID                                 = 0x00,
+           DEPTH_16                                      = 0x01,
+           DEPTH_X8_24                                   = 0x02,
+           DEPTH_8_24                                    = 0x03,
+           DEPTH_X8_24_FLOAT                             = 0x04,
+           DEPTH_8_24_FLOAT                              = 0x05,
+           DEPTH_32_FLOAT                                = 0x06,
+           DEPTH_X24_8_32_FLOAT                          = 0x07,
+       DB_DEPTH_INFO__READ_SIZE_bit                      = 1 << 3,
+       DB_DEPTH_INFO__ARRAY_MODE_mask                    = 0x0f << 15,
+       DB_DEPTH_INFO__ARRAY_MODE_shift                   = 15,
+           ARRAY_2D_TILED_THIN1                          = 0x04,
+       TILE_SURFACE_ENABLE_bit                           = 1 << 25,
+       TILE_COMPACT_bit                                  = 1 << 26,
+       ZRANGE_PRECISION_bit                              = 1 << 31,
+    DB_HTILE_DATA_BASE                                    = 0x00028014,
+    DB_STENCIL_CLEAR                                      = 0x00028028,
+       DB_STENCIL_CLEAR__CLEAR_mask                      = 0xff << 0,
+       DB_STENCIL_CLEAR__CLEAR_shift                     = 0,
+       MIN_mask                                          = 0xff << 16,
+       MIN_shift                                         = 16,
+    DB_DEPTH_CLEAR                                        = 0x0002802c,
+    PA_SC_SCREEN_SCISSOR_TL                               = 0x00028030,
+       PA_SC_SCREEN_SCISSOR_TL__TL_X_mask                = 0x7fff << 0,
+       PA_SC_SCREEN_SCISSOR_TL__TL_X_shift               = 0,
+       PA_SC_SCREEN_SCISSOR_TL__TL_Y_mask                = 0x7fff << 16,
+       PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift               = 16,
+    PA_SC_SCREEN_SCISSOR_BR                               = 0x00028034,
+       PA_SC_SCREEN_SCISSOR_BR__BR_X_mask                = 0x7fff << 0,
+       PA_SC_SCREEN_SCISSOR_BR__BR_X_shift               = 0,
+       PA_SC_SCREEN_SCISSOR_BR__BR_Y_mask                = 0x7fff << 16,
+       PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift               = 16,
+    CB_COLOR0_BASE                                        = 0x00028040,
+       CB_COLOR0_BASE_num                                = 8,
+    CB_COLOR0_SIZE                                        = 0x00028060,
+       CB_COLOR0_SIZE_num                                = 8,
+/*     PITCH_TILE_MAX_mask                               = 0x3ff << 0, */
+/*     PITCH_TILE_MAX_shift                              = 0, */
+/*     SLICE_TILE_MAX_mask                               = 0xfffff << 10, */
+/*     SLICE_TILE_MAX_shift                              = 10, */
+    CB_COLOR0_VIEW                                        = 0x00028080,
+       CB_COLOR0_VIEW_num                                = 8,
+/*     SLICE_START_mask                                  = 0x7ff << 0, */
+/*     SLICE_START_shift                                 = 0, */
+/*     SLICE_MAX_mask                                    = 0x7ff << 13, */
+/*     SLICE_MAX_shift                                   = 13, */
+    CB_COLOR0_INFO                                        = 0x000280a0,
+       CB_COLOR0_INFO_num                                = 8,
+       ENDIAN_mask                                       = 0x03 << 0,
+       ENDIAN_shift                                      = 0,
+           ENDIAN_NONE                                   = 0x00,
+           ENDIAN_8IN16                                  = 0x01,
+           ENDIAN_8IN32                                  = 0x02,
+           ENDIAN_8IN64                                  = 0x03,
+       CB_COLOR0_INFO__FORMAT_mask                       = 0x3f << 2,
+       CB_COLOR0_INFO__FORMAT_shift                      = 2,
+           COLOR_INVALID                                 = 0x00,
+           COLOR_8                                       = 0x01,
+           COLOR_4_4                                     = 0x02,
+           COLOR_3_3_2                                   = 0x03,
+           COLOR_16                                      = 0x05,
+           COLOR_16_FLOAT                                = 0x06,
+           COLOR_8_8                                     = 0x07,
+           COLOR_5_6_5                                   = 0x08,
+           COLOR_6_5_5                                   = 0x09,
+           COLOR_1_5_5_5                                 = 0x0a,
+           COLOR_4_4_4_4                                 = 0x0b,
+           COLOR_5_5_5_1                                 = 0x0c,
+           COLOR_32                                      = 0x0d,
+           COLOR_32_FLOAT                                = 0x0e,
+           COLOR_16_16                                   = 0x0f,
+           COLOR_16_16_FLOAT                             = 0x10,
+           COLOR_8_24                                    = 0x11,
+           COLOR_8_24_FLOAT                              = 0x12,
+           COLOR_24_8                                    = 0x13,
+           COLOR_24_8_FLOAT                              = 0x14,
+           COLOR_10_11_11                                = 0x15,
+           COLOR_10_11_11_FLOAT                          = 0x16,
+           COLOR_11_11_10                                = 0x17,
+           COLOR_11_11_10_FLOAT                          = 0x18,
+           COLOR_2_10_10_10                              = 0x19,
+           COLOR_8_8_8_8                                 = 0x1a,
+           COLOR_10_10_10_2                              = 0x1b,
+           COLOR_X24_8_32_FLOAT                          = 0x1c,
+           COLOR_32_32                                   = 0x1d,
+           COLOR_32_32_FLOAT                             = 0x1e,
+           COLOR_16_16_16_16                             = 0x1f,
+           COLOR_16_16_16_16_FLOAT                       = 0x20,
+           COLOR_32_32_32_32                             = 0x22,
+           COLOR_32_32_32_32_FLOAT                       = 0x23,
+       CB_COLOR0_INFO__ARRAY_MODE_mask                   = 0x0f << 8,
+       CB_COLOR0_INFO__ARRAY_MODE_shift                  = 8,
+           ARRAY_LINEAR_GENERAL                          = 0x00,
+           ARRAY_LINEAR_ALIGNED                          = 0x01,
+/*         ARRAY_2D_TILED_THIN1                          = 0x04, */
+       NUMBER_TYPE_mask                                  = 0x07 << 12,
+       NUMBER_TYPE_shift                                 = 12,
+           NUMBER_UNORM                                  = 0x00,
+           NUMBER_SNORM                                  = 0x01,
+           NUMBER_USCALED                                = 0x02,
+           NUMBER_SSCALED                                = 0x03,
+           NUMBER_UINT                                   = 0x04,
+           NUMBER_SINT                                   = 0x05,
+           NUMBER_SRGB                                   = 0x06,
+           NUMBER_FLOAT                                  = 0x07,
+       CB_COLOR0_INFO__READ_SIZE_bit                     = 1 << 15,
+       COMP_SWAP_mask                                    = 0x03 << 16,
+       COMP_SWAP_shift                                   = 16,
+           SWAP_STD                                      = 0x00,
+           SWAP_ALT                                      = 0x01,
+           SWAP_STD_REV                                  = 0x02,
+           SWAP_ALT_REV                                  = 0x03,
+       CB_COLOR0_INFO__TILE_MODE_mask                    = 0x03 << 18,
+       CB_COLOR0_INFO__TILE_MODE_shift                   = 18,
+           TILE_DISABLE                                  = 0x00,
+           TILE_CLEAR_ENABLE                             = 0x01,
+           TILE_FRAG_ENABLE                              = 0x02,
+       BLEND_CLAMP_bit                                   = 1 << 20,
+       CLEAR_COLOR_bit                                   = 1 << 21,
+       BLEND_BYPASS_bit                                  = 1 << 22,
+       BLEND_FLOAT32_bit                                 = 1 << 23,
+       SIMPLE_FLOAT_bit                                  = 1 << 24,
+       CB_COLOR0_INFO__ROUND_MODE_bit                    = 1 << 25,
+/*     TILE_COMPACT_bit                                  = 1 << 26, */
+       SOURCE_FORMAT_bit                                 = 1 << 27,
+    CB_COLOR0_TILE                                        = 0x000280c0,
+       CB_COLOR0_TILE_num                                = 8,
+    CB_COLOR0_FRAG                                        = 0x000280e0,
+       CB_COLOR0_FRAG_num                                = 8,
+    CB_COLOR0_MASK                                        = 0x00028100,
+       CB_COLOR0_MASK_num                                = 8,
+       CMASK_BLOCK_MAX_mask                              = 0xfff << 0,
+       CMASK_BLOCK_MAX_shift                             = 0,
+       FMASK_TILE_MAX_mask                               = 0xfffff << 12,
+       FMASK_TILE_MAX_shift                              = 12,
+    CB_CLEAR_RED                                          = 0x00028120,
+    CB_CLEAR_GREEN                                        = 0x00028124,
+    CB_CLEAR_BLUE                                         = 0x00028128,
+    CB_CLEAR_ALPHA                                        = 0x0002812c,
+    SQ_ALU_CONST_BUFFER_SIZE_PS_0                         = 0x00028140,
+       SQ_ALU_CONST_BUFFER_SIZE_PS_0_num                 = 16,
+       SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_mask          = 0x1ff << 0,
+       SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_shift         = 0,
+    SQ_ALU_CONST_BUFFER_SIZE_VS_0                         = 0x00028180,
+       SQ_ALU_CONST_BUFFER_SIZE_VS_0_num                 = 16,
+       SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_mask          = 0x1ff << 0,
+       SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_shift         = 0,
+    SQ_ALU_CONST_BUFFER_SIZE_GS_0                         = 0x000281c0,
+       SQ_ALU_CONST_BUFFER_SIZE_GS_0_num                 = 16,
+       SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_mask          = 0x1ff << 0,
+       SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_shift         = 0,
+    PA_SC_WINDOW_OFFSET                                   = 0x00028200,
+       WINDOW_X_OFFSET_mask                              = 0x7fff << 0,
+       WINDOW_X_OFFSET_shift                             = 0,
+       WINDOW_Y_OFFSET_mask                              = 0x7fff << 16,
+       WINDOW_Y_OFFSET_shift                             = 16,
+    PA_SC_WINDOW_SCISSOR_TL                               = 0x00028204,
+       PA_SC_WINDOW_SCISSOR_TL__TL_X_mask                = 0x3fff << 0,
+       PA_SC_WINDOW_SCISSOR_TL__TL_X_shift               = 0,
+       PA_SC_WINDOW_SCISSOR_TL__TL_Y_mask                = 0x3fff << 16,
+       PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift               = 16,
+       WINDOW_OFFSET_DISABLE_bit                         = 1 << 31,
+    PA_SC_WINDOW_SCISSOR_BR                               = 0x00028208,
+       PA_SC_WINDOW_SCISSOR_BR__BR_X_mask                = 0x3fff << 0,
+       PA_SC_WINDOW_SCISSOR_BR__BR_X_shift               = 0,
+       PA_SC_WINDOW_SCISSOR_BR__BR_Y_mask                = 0x3fff << 16,
+       PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift               = 16,
+    PA_SC_CLIPRECT_RULE                                   = 0x0002820c,
+       CLIP_RULE_mask                                    = 0xffff << 0,
+       CLIP_RULE_shift                                   = 0,
+    PA_SC_CLIPRECT_0_TL                                   = 0x00028210,
+       PA_SC_CLIPRECT_0_TL_num                           = 4,
+       PA_SC_CLIPRECT_0_TL_offset                        = 8,
+       PA_SC_CLIPRECT_0_TL__TL_X_mask                    = 0x3fff << 0,
+       PA_SC_CLIPRECT_0_TL__TL_X_shift                   = 0,
+       PA_SC_CLIPRECT_0_TL__TL_Y_mask                    = 0x3fff << 16,
+       PA_SC_CLIPRECT_0_TL__TL_Y_shift                   = 16,
+    PA_SC_CLIPRECT_0_BR                                   = 0x00028214,
+       PA_SC_CLIPRECT_0_BR_num                           = 4,
+       PA_SC_CLIPRECT_0_BR_offset                        = 8,
+       PA_SC_CLIPRECT_0_BR__BR_X_mask                    = 0x3fff << 0,
+       PA_SC_CLIPRECT_0_BR__BR_X_shift                   = 0,
+       PA_SC_CLIPRECT_0_BR__BR_Y_mask                    = 0x3fff << 16,
+       PA_SC_CLIPRECT_0_BR__BR_Y_shift                   = 16,
+    CB_TARGET_MASK                                        = 0x00028238,
+       TARGET0_ENABLE_mask                               = 0x0f << 0,
+       TARGET0_ENABLE_shift                              = 0,
+       TARGET1_ENABLE_mask                               = 0x0f << 4,
+       TARGET1_ENABLE_shift                              = 4,
+       TARGET2_ENABLE_mask                               = 0x0f << 8,
+       TARGET2_ENABLE_shift                              = 8,
+       TARGET3_ENABLE_mask                               = 0x0f << 12,
+       TARGET3_ENABLE_shift                              = 12,
+       TARGET4_ENABLE_mask                               = 0x0f << 16,
+       TARGET4_ENABLE_shift                              = 16,
+       TARGET5_ENABLE_mask                               = 0x0f << 20,
+       TARGET5_ENABLE_shift                              = 20,
+       TARGET6_ENABLE_mask                               = 0x0f << 24,
+       TARGET6_ENABLE_shift                              = 24,
+       TARGET7_ENABLE_mask                               = 0x0f << 28,
+       TARGET7_ENABLE_shift                              = 28,
+    CB_SHADER_MASK                                        = 0x0002823c,
+       OUTPUT0_ENABLE_mask                               = 0x0f << 0,
+       OUTPUT0_ENABLE_shift                              = 0,
+       OUTPUT1_ENABLE_mask                               = 0x0f << 4,
+       OUTPUT1_ENABLE_shift                              = 4,
+       OUTPUT2_ENABLE_mask                               = 0x0f << 8,
+       OUTPUT2_ENABLE_shift                              = 8,
+       OUTPUT3_ENABLE_mask                               = 0x0f << 12,
+       OUTPUT3_ENABLE_shift                              = 12,
+       OUTPUT4_ENABLE_mask                               = 0x0f << 16,
+       OUTPUT4_ENABLE_shift                              = 16,
+       OUTPUT5_ENABLE_mask                               = 0x0f << 20,
+       OUTPUT5_ENABLE_shift                              = 20,
+       OUTPUT6_ENABLE_mask                               = 0x0f << 24,
+       OUTPUT6_ENABLE_shift                              = 24,
+       OUTPUT7_ENABLE_mask                               = 0x0f << 28,
+       OUTPUT7_ENABLE_shift                              = 28,
+    PA_SC_GENERIC_SCISSOR_TL                              = 0x00028240,
+       PA_SC_GENERIC_SCISSOR_TL__TL_X_mask               = 0x3fff << 0,
+       PA_SC_GENERIC_SCISSOR_TL__TL_X_shift              = 0,
+       PA_SC_GENERIC_SCISSOR_TL__TL_Y_mask               = 0x3fff << 16,
+       PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift              = 16,
+/*     WINDOW_OFFSET_DISABLE_bit                         = 1 << 31, */
+    PA_SC_GENERIC_SCISSOR_BR                              = 0x00028244,
+       PA_SC_GENERIC_SCISSOR_BR__BR_X_mask               = 0x3fff << 0,
+       PA_SC_GENERIC_SCISSOR_BR__BR_X_shift              = 0,
+       PA_SC_GENERIC_SCISSOR_BR__BR_Y_mask               = 0x3fff << 16,
+       PA_SC_GENERIC_SCISSOR_BR__BR_Y_shift              = 16,
+    PA_SC_VPORT_SCISSOR_0_TL                              = 0x00028250,
+       PA_SC_VPORT_SCISSOR_0_TL_num                      = 16,
+       PA_SC_VPORT_SCISSOR_0_TL_offset                   = 8,
+       PA_SC_VPORT_SCISSOR_0_TL__TL_X_mask               = 0x3fff << 0,
+       PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift              = 0,
+       PA_SC_VPORT_SCISSOR_0_TL__TL_Y_mask               = 0x3fff << 16,
+       PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift              = 16,
+/*     WINDOW_OFFSET_DISABLE_bit                         = 1 << 31, */
+    PA_SC_VPORT_SCISSOR_0_BR                              = 0x00028254,
+       PA_SC_VPORT_SCISSOR_0_BR_num                      = 16,
+       PA_SC_VPORT_SCISSOR_0_BR_offset                   = 8,
+       PA_SC_VPORT_SCISSOR_0_BR__BR_X_mask               = 0x3fff << 0,
+       PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift              = 0,
+       PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask               = 0x3fff << 16,
+       PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift              = 16,
+    PA_SC_VPORT_ZMIN_0                                    = 0x000282d0,
+       PA_SC_VPORT_ZMIN_0_num                            = 16,
+       PA_SC_VPORT_ZMIN_0_offset                         = 8,
+    PA_SC_VPORT_ZMAX_0                                    = 0x000282d4,
+       PA_SC_VPORT_ZMAX_0_num                            = 16,
+       PA_SC_VPORT_ZMAX_0_offset                         = 8,
+    SX_MISC                                               = 0x00028350,
+       MULTIPASS_bit                                     = 1 << 0,
+    SQ_VTX_SEMANTIC_0                                     = 0x00028380,
+       SQ_VTX_SEMANTIC_0_num                             = 32,
+/*     SEMANTIC_ID_mask                                  = 0xff << 0, */
+/*     SEMANTIC_ID_shift                                 = 0, */
+    VGT_MAX_VTX_INDX                                      = 0x00028400,
+    VGT_MIN_VTX_INDX                                      = 0x00028404,
+    VGT_INDX_OFFSET                                       = 0x00028408,
+    VGT_MULTI_PRIM_IB_RESET_INDX                          = 0x0002840c,
+    SX_ALPHA_TEST_CONTROL                                 = 0x00028410,
+       ALPHA_FUNC_mask                                   = 0x07 << 0,
+       ALPHA_FUNC_shift                                  = 0,
+           REF_NEVER                                     = 0x00,
+           REF_LESS                                      = 0x01,
+           REF_EQUAL                                     = 0x02,
+           REF_LEQUAL                                    = 0x03,
+           REF_GREATER                                   = 0x04,
+           REF_NOTEQUAL                                  = 0x05,
+           REF_GEQUAL                                    = 0x06,
+           REF_ALWAYS                                    = 0x07,
+       ALPHA_TEST_ENABLE_bit                             = 1 << 3,
+       ALPHA_TEST_BYPASS_bit                             = 1 << 8,
+    CB_BLEND_RED                                          = 0x00028414,
+    CB_BLEND_GREEN                                        = 0x00028418,
+    CB_BLEND_BLUE                                         = 0x0002841c,
+    CB_BLEND_ALPHA                                        = 0x00028420,
+    CB_FOG_RED                                            = 0x00028424,
+    CB_FOG_GREEN                                          = 0x00028428,
+    CB_FOG_BLUE                                           = 0x0002842c,
+    DB_STENCILREFMASK                                     = 0x00028430,
+       STENCILREF_mask                                   = 0xff << 0,
+       STENCILREF_shift                                  = 0,
+       STENCILMASK_mask                                  = 0xff << 8,
+       STENCILMASK_shift                                 = 8,
+       STENCILWRITEMASK_mask                             = 0xff << 16,
+       STENCILWRITEMASK_shift                            = 16,
+    DB_STENCILREFMASK_BF                                  = 0x00028434,
+       STENCILREF_BF_mask                                = 0xff << 0,
+       STENCILREF_BF_shift                               = 0,
+       STENCILMASK_BF_mask                               = 0xff << 8,
+       STENCILMASK_BF_shift                              = 8,
+       STENCILWRITEMASK_BF_mask                          = 0xff << 16,
+       STENCILWRITEMASK_BF_shift                         = 16,
+    SX_ALPHA_REF                                          = 0x00028438,
+    PA_CL_VPORT_XSCALE_0                                  = 0x0002843c,
+       PA_CL_VPORT_XSCALE_0_num                          = 16,
+       PA_CL_VPORT_XSCALE_0_offset                       = 24,
+    PA_CL_VPORT_XOFFSET_0                                 = 0x00028440,
+       PA_CL_VPORT_XOFFSET_0_num                         = 16,
+       PA_CL_VPORT_XOFFSET_0_offset                      = 24,
+    PA_CL_VPORT_YSCALE_0                                  = 0x00028444,
+       PA_CL_VPORT_YSCALE_0_num                          = 16,
+       PA_CL_VPORT_YSCALE_0_offset                       = 24,
+    PA_CL_VPORT_YOFFSET_0                                 = 0x00028448,
+       PA_CL_VPORT_YOFFSET_0_num                         = 16,
+       PA_CL_VPORT_YOFFSET_0_offset                      = 24,
+    PA_CL_VPORT_ZSCALE_0                                  = 0x0002844c,
+       PA_CL_VPORT_ZSCALE_0_num                          = 16,
+       PA_CL_VPORT_ZSCALE_0_offset                       = 24,
+    PA_CL_VPORT_ZOFFSET_0                                 = 0x00028450,
+       PA_CL_VPORT_ZOFFSET_0_num                         = 16,
+       PA_CL_VPORT_ZOFFSET_0_offset                      = 24,
+    SPI_VS_OUT_ID_0                                       = 0x00028614,
+       SPI_VS_OUT_ID_0_num                               = 10,
+       SEMANTIC_0_mask                                   = 0xff << 0,
+       SEMANTIC_0_shift                                  = 0,
+       SEMANTIC_1_mask                                   = 0xff << 8,
+       SEMANTIC_1_shift                                  = 8,
+       SEMANTIC_2_mask                                   = 0xff << 16,
+       SEMANTIC_2_shift                                  = 16,
+       SEMANTIC_3_mask                                   = 0xff << 24,
+       SEMANTIC_3_shift                                  = 24,
+    SPI_PS_INPUT_CNTL_0                                   = 0x00028644,
+       SPI_PS_INPUT_CNTL_0_num                           = 32,
+       SEMANTIC_mask                                     = 0xff << 0,
+       SEMANTIC_shift                                    = 0,
+       DEFAULT_VAL_mask                                  = 0x03 << 8,
+       DEFAULT_VAL_shift                                 = 8,
+           X_0_0F                                        = 0x00,
+       FLAT_SHADE_bit                                    = 1 << 10,
+       SEL_CENTROID_bit                                  = 1 << 11,
+       SEL_LINEAR_bit                                    = 1 << 12,
+       CYL_WRAP_mask                                     = 0x0f << 13,
+       CYL_WRAP_shift                                    = 13,
+       PT_SPRITE_TEX_bit                                 = 1 << 17,
+       SEL_SAMPLE_bit                                    = 1 << 18,
+    SPI_VS_OUT_CONFIG                                     = 0x000286c4,
+       VS_PER_COMPONENT_bit                              = 1 << 0,
+       VS_EXPORT_COUNT_mask                              = 0x1f << 1,
+       VS_EXPORT_COUNT_shift                             = 1,
+       VS_EXPORTS_FOG_bit                                = 1 << 8,
+       VS_OUT_FOG_VEC_ADDR_mask                          = 0x1f << 9,
+       VS_OUT_FOG_VEC_ADDR_shift                         = 9,
+    SPI_PS_IN_CONTROL_0                                   = 0x000286cc,
+       NUM_INTERP_mask                                   = 0x3f << 0,
+       NUM_INTERP_shift                                  = 0,
+       POSITION_ENA_bit                                  = 1 << 8,
+       POSITION_CENTROID_bit                             = 1 << 9,
+       POSITION_ADDR_mask                                = 0x1f << 10,
+       POSITION_ADDR_shift                               = 10,
+       PARAM_GEN_mask                                    = 0x0f << 15,
+       PARAM_GEN_shift                                   = 15,
+       PARAM_GEN_ADDR_mask                               = 0x7f << 19,
+       PARAM_GEN_ADDR_shift                              = 19,
+       BARYC_SAMPLE_CNTL_mask                            = 0x03 << 26,
+       BARYC_SAMPLE_CNTL_shift                           = 26,
+           CENTROIDS_ONLY                                = 0x00,
+           CENTERS_ONLY                                  = 0x01,
+           CENTROIDS_AND_CENTERS                         = 0x02,
+           UNDEF                                         = 0x03,
+       PERSP_GRADIENT_ENA_bit                            = 1 << 28,
+       LINEAR_GRADIENT_ENA_bit                           = 1 << 29,
+       POSITION_SAMPLE_bit                               = 1 << 30,
+       BARYC_AT_SAMPLE_ENA_bit                           = 1 << 31,
+    SPI_PS_IN_CONTROL_1                                   = 0x000286d0,
+       GEN_INDEX_PIX_bit                                 = 1 << 0,
+       GEN_INDEX_PIX_ADDR_mask                           = 0x7f << 1,
+       GEN_INDEX_PIX_ADDR_shift                          = 1,
+       FRONT_FACE_ENA_bit                                = 1 << 8,
+       FRONT_FACE_CHAN_mask                              = 0x03 << 9,
+       FRONT_FACE_CHAN_shift                             = 9,
+       FRONT_FACE_ALL_BITS_bit                           = 1 << 11,
+       FRONT_FACE_ADDR_mask                              = 0x1f << 12,
+       FRONT_FACE_ADDR_shift                             = 12,
+       FOG_ADDR_mask                                     = 0x7f << 17,
+       FOG_ADDR_shift                                    = 17,
+       FIXED_PT_POSITION_ENA_bit                         = 1 << 24,
+       FIXED_PT_POSITION_ADDR_mask                       = 0x1f << 25,
+       FIXED_PT_POSITION_ADDR_shift                      = 25,
+    SPI_INTERP_CONTROL_0                                  = 0x000286d4,
+       FLAT_SHADE_ENA_bit                                = 1 << 0,
+       PNT_SPRITE_ENA_bit                                = 1 << 1,
+       PNT_SPRITE_OVRD_X_mask                            = 0x07 << 2,
+       PNT_SPRITE_OVRD_X_shift                           = 2,
+           SPI_PNT_SPRITE_SEL_0                          = 0x00,
+           SPI_PNT_SPRITE_SEL_1                          = 0x01,
+           SPI_PNT_SPRITE_SEL_S                          = 0x02,
+           SPI_PNT_SPRITE_SEL_T                          = 0x03,
+           SPI_PNT_SPRITE_SEL_NONE                       = 0x04,
+       PNT_SPRITE_OVRD_Y_mask                            = 0x07 << 5,
+       PNT_SPRITE_OVRD_Y_shift                           = 5,
+/*         SPI_PNT_SPRITE_SEL_0                          = 0x00, */
+/*         SPI_PNT_SPRITE_SEL_1                          = 0x01, */
+/*         SPI_PNT_SPRITE_SEL_S                          = 0x02, */
+/*         SPI_PNT_SPRITE_SEL_T                          = 0x03, */
+/*         SPI_PNT_SPRITE_SEL_NONE                       = 0x04, */
+       PNT_SPRITE_OVRD_Z_mask                            = 0x07 << 8,
+       PNT_SPRITE_OVRD_Z_shift                           = 8,
+/*         SPI_PNT_SPRITE_SEL_0                          = 0x00, */
+/*         SPI_PNT_SPRITE_SEL_1                          = 0x01, */
+/*         SPI_PNT_SPRITE_SEL_S                          = 0x02, */
+/*         SPI_PNT_SPRITE_SEL_T                          = 0x03, */
+/*         SPI_PNT_SPRITE_SEL_NONE                       = 0x04, */
+       PNT_SPRITE_OVRD_W_mask                            = 0x07 << 11,
+       PNT_SPRITE_OVRD_W_shift                           = 11,
+/*         SPI_PNT_SPRITE_SEL_0                          = 0x00, */
+/*         SPI_PNT_SPRITE_SEL_1                          = 0x01, */
+/*         SPI_PNT_SPRITE_SEL_S                          = 0x02, */
+/*         SPI_PNT_SPRITE_SEL_T                          = 0x03, */
+/*         SPI_PNT_SPRITE_SEL_NONE                       = 0x04, */
+       PNT_SPRITE_TOP_1_bit                              = 1 << 14,
+    SPI_INPUT_Z                                           = 0x000286d8,
+       PROVIDE_Z_TO_SPI_bit                              = 1 << 0,
+    SPI_FOG_CNTL                                          = 0x000286dc,
+       PASS_FOG_THROUGH_PS_bit                           = 1 << 0,
+       PIXEL_FOG_FUNC_mask                               = 0x03 << 1,
+       PIXEL_FOG_FUNC_shift                              = 1,
+           SPI_FOG_NONE                                  = 0x00,
+           SPI_FOG_EXP                                   = 0x01,
+           SPI_FOG_EXP2                                  = 0x02,
+           SPI_FOG_LINEAR                                = 0x03,
+       PIXEL_FOG_SRC_SEL_bit                             = 1 << 3,
+       VS_FOG_CLAMP_DISABLE_bit                          = 1 << 4,
+    SPI_FOG_FUNC_SCALE                                    = 0x000286e0,
+    SPI_FOG_FUNC_BIAS                                     = 0x000286e4,
+    CB_BLEND0_CONTROL                                     = 0x00028780,
+       CB_BLEND0_CONTROL_num                             = 8,
+       COLOR_SRCBLEND_mask                               = 0x1f << 0,
+       COLOR_SRCBLEND_shift                              = 0,
+       COLOR_COMB_FCN_mask                               = 0x07 << 5,
+       COLOR_COMB_FCN_shift                              = 5,
+       COLOR_DESTBLEND_mask                              = 0x1f << 8,
+       COLOR_DESTBLEND_shift                             = 8,
+       OPACITY_WEIGHT_bit                                = 1 << 13,
+       ALPHA_SRCBLEND_mask                               = 0x1f << 16,
+       ALPHA_SRCBLEND_shift                              = 16,
+       ALPHA_COMB_FCN_mask                               = 0x07 << 21,
+       ALPHA_COMB_FCN_shift                              = 21,
+       ALPHA_DESTBLEND_mask                              = 0x1f << 24,
+       ALPHA_DESTBLEND_shift                             = 24,
+       SEPARATE_ALPHA_BLEND_bit                          = 1 << 29,
+    VGT_DMA_BASE_HI                                       = 0x000287e4,
+       VGT_DMA_BASE_HI__BASE_ADDR_mask                   = 0xff << 0,
+       VGT_DMA_BASE_HI__BASE_ADDR_shift                  = 0,
+    VGT_DMA_BASE                                          = 0x000287e8,
+    VGT_DRAW_INITIATOR                                    = 0x000287f0,
+       SOURCE_SELECT_mask                                = 0x03 << 0,
+       SOURCE_SELECT_shift                               = 0,
+           DI_SRC_SEL_DMA                                = 0x00,
+           DI_SRC_SEL_IMMEDIATE                          = 0x01,
+           DI_SRC_SEL_AUTO_INDEX                         = 0x02,
+           DI_SRC_SEL_RESERVED                           = 0x03,
+       MAJOR_MODE_mask                                   = 0x03 << 2,
+       MAJOR_MODE_shift                                  = 2,
+           DI_MAJOR_MODE_0                               = 0x00,
+           DI_MAJOR_MODE_1                               = 0x01,
+       SPRITE_EN_bit                                     = 1 << 4,
+       NOT_EOP_bit                                       = 1 << 5,
+       USE_OPAQUE_bit                                    = 1 << 6,
+    VGT_IMMED_DATA                                        = 0x000287f4,
+    VGT_EVENT_ADDRESS_REG                                 = 0x000287f8,
+       ADDRESS_LOW_mask                                  = 0xfffffff << 0,
+       ADDRESS_LOW_shift                                 = 0,
+    DB_DEPTH_CONTROL                                      = 0x00028800,
+       STENCIL_ENABLE_bit                                = 1 << 0,
+       Z_ENABLE_bit                                      = 1 << 1,
+       Z_WRITE_ENABLE_bit                                = 1 << 2,
+       ZFUNC_mask                                        = 0x07 << 4,
+       ZFUNC_shift                                       = 4,
+           FRAG_NEVER                                    = 0x00,
+           FRAG_LESS                                     = 0x01,
+           FRAG_EQUAL                                    = 0x02,
+           FRAG_LEQUAL                                   = 0x03,
+           FRAG_GREATER                                  = 0x04,
+           FRAG_NOTEQUAL                                 = 0x05,
+           FRAG_GEQUAL                                   = 0x06,
+           FRAG_ALWAYS                                   = 0x07,
+       BACKFACE_ENABLE_bit                               = 1 << 7,
+       STENCILFUNC_mask                                  = 0x07 << 8,
+       STENCILFUNC_shift                                 = 8,
+/*         REF_NEVER                                     = 0x00, */
+/*         REF_LESS                                      = 0x01, */
+/*         REF_EQUAL                                     = 0x02, */
+/*         REF_LEQUAL                                    = 0x03, */
+/*         REF_GREATER                                   = 0x04, */
+/*         REF_NOTEQUAL                                  = 0x05, */
+/*         REF_GEQUAL                                    = 0x06, */
+/*         REF_ALWAYS                                    = 0x07, */
+       STENCILFAIL_mask                                  = 0x07 << 11,
+       STENCILFAIL_shift                                 = 11,
+           STENCIL_KEEP                                  = 0x00,
+           STENCIL_ZERO                                  = 0x01,
+           STENCIL_REPLACE                               = 0x02,
+           STENCIL_INCR_CLAMP                            = 0x03,
+           STENCIL_DECR_CLAMP                            = 0x04,
+           STENCIL_INVERT                                = 0x05,
+           STENCIL_INCR_WRAP                             = 0x06,
+           STENCIL_DECR_WRAP                             = 0x07,
+       STENCILZPASS_mask                                 = 0x07 << 14,
+       STENCILZPASS_shift                                = 14,
+/*         STENCIL_KEEP                                  = 0x00, */
+/*         STENCIL_ZERO                                  = 0x01, */
+/*         STENCIL_REPLACE                               = 0x02, */
+/*         STENCIL_INCR_CLAMP                            = 0x03, */
+/*         STENCIL_DECR_CLAMP                            = 0x04, */
+/*         STENCIL_INVERT                                = 0x05, */
+/*         STENCIL_INCR_WRAP                             = 0x06, */
+/*         STENCIL_DECR_WRAP                             = 0x07, */
+       STENCILZFAIL_mask                                 = 0x07 << 17,
+       STENCILZFAIL_shift                                = 17,
+/*         STENCIL_KEEP                                  = 0x00, */
+/*         STENCIL_ZERO                                  = 0x01, */
+/*         STENCIL_REPLACE                               = 0x02, */
+/*         STENCIL_INCR_CLAMP                            = 0x03, */
+/*         STENCIL_DECR_CLAMP                            = 0x04, */
+/*         STENCIL_INVERT                                = 0x05, */
+/*         STENCIL_INCR_WRAP                             = 0x06, */
+/*         STENCIL_DECR_WRAP                             = 0x07, */
+       STENCILFUNC_BF_mask                               = 0x07 << 20,
+       STENCILFUNC_BF_shift                              = 20,
+/*         REF_NEVER                                     = 0x00, */
+/*         REF_LESS                                      = 0x01, */
+/*         REF_EQUAL                                     = 0x02, */
+/*         REF_LEQUAL                                    = 0x03, */
+/*         REF_GREATER                                   = 0x04, */
+/*         REF_NOTEQUAL                                  = 0x05, */
+/*         REF_GEQUAL                                    = 0x06, */
+/*         REF_ALWAYS                                    = 0x07, */
+       STENCILFAIL_BF_mask                               = 0x07 << 23,
+       STENCILFAIL_BF_shift                              = 23,
+/*         STENCIL_KEEP                                  = 0x00, */
+/*         STENCIL_ZERO                                  = 0x01, */
+/*         STENCIL_REPLACE                               = 0x02, */
+/*         STENCIL_INCR_CLAMP                            = 0x03, */
+/*         STENCIL_DECR_CLAMP                            = 0x04, */
+/*         STENCIL_INVERT                                = 0x05, */
+/*         STENCIL_INCR_WRAP                             = 0x06, */
+/*         STENCIL_DECR_WRAP                             = 0x07, */
+       STENCILZPASS_BF_mask                              = 0x07 << 26,
+       STENCILZPASS_BF_shift                             = 26,
+/*         STENCIL_KEEP                                  = 0x00, */
+/*         STENCIL_ZERO                                  = 0x01, */
+/*         STENCIL_REPLACE                               = 0x02, */
+/*         STENCIL_INCR_CLAMP                            = 0x03, */
+/*         STENCIL_DECR_CLAMP                            = 0x04, */
+/*         STENCIL_INVERT                                = 0x05, */
+/*         STENCIL_INCR_WRAP                             = 0x06, */
+/*         STENCIL_DECR_WRAP                             = 0x07, */
+       STENCILZFAIL_BF_mask                              = 0x07 << 29,
+       STENCILZFAIL_BF_shift                             = 29,
+/*         STENCIL_KEEP                                  = 0x00, */
+/*         STENCIL_ZERO                                  = 0x01, */
+/*         STENCIL_REPLACE                               = 0x02, */
+/*         STENCIL_INCR_CLAMP                            = 0x03, */
+/*         STENCIL_DECR_CLAMP                            = 0x04, */
+/*         STENCIL_INVERT                                = 0x05, */
+/*         STENCIL_INCR_WRAP                             = 0x06, */
+/*         STENCIL_DECR_WRAP                             = 0x07, */
+    CB_BLEND_CONTROL                                      = 0x00028804,
+/*     COLOR_SRCBLEND_mask                               = 0x1f << 0, */
+/*     COLOR_SRCBLEND_shift                              = 0, */
+           BLEND_ZERO                                    = 0x00,
+           BLEND_ONE                                     = 0x01,
+           BLEND_SRC_COLOR                               = 0x02,
+           BLEND_ONE_MINUS_SRC_COLOR                     = 0x03,
+           BLEND_SRC_ALPHA                               = 0x04,
+           BLEND_ONE_MINUS_SRC_ALPHA                     = 0x05,
+           BLEND_DST_ALPHA                               = 0x06,
+           BLEND_ONE_MINUS_DST_ALPHA                     = 0x07,
+           BLEND_DST_COLOR                               = 0x08,
+           BLEND_ONE_MINUS_DST_COLOR                     = 0x09,
+           BLEND_SRC_ALPHA_SATURATE                      = 0x0a,
+           BLEND_BOTH_SRC_ALPHA                          = 0x0b,
+           BLEND_BOTH_INV_SRC_ALPHA                      = 0x0c,
+           BLEND_CONSTANT_COLOR                          = 0x0d,
+           BLEND_ONE_MINUS_CONSTANT_COLOR                = 0x0e,
+           BLEND_SRC1_COLOR                              = 0x0f,
+           BLEND_INV_SRC1_COLOR                          = 0x10,
+           BLEND_SRC1_ALPHA                              = 0x11,
+           BLEND_INV_SRC1_ALPHA                          = 0x12,
+           BLEND_CONSTANT_ALPHA                          = 0x13,
+           BLEND_ONE_MINUS_CONSTANT_ALPHA                = 0x14,
+/*     COLOR_COMB_FCN_mask                               = 0x07 << 5, */
+/*     COLOR_COMB_FCN_shift                              = 5, */
+           COMB_DST_PLUS_SRC                             = 0x00,
+           COMB_SRC_MINUS_DST                            = 0x01,
+           COMB_MIN_DST_SRC                              = 0x02,
+           COMB_MAX_DST_SRC                              = 0x03,
+           COMB_DST_MINUS_SRC                            = 0x04,
+/*     COLOR_DESTBLEND_mask                              = 0x1f << 8, */
+/*     COLOR_DESTBLEND_shift                             = 8, */
+/*         BLEND_ZERO                                    = 0x00, */
+/*         BLEND_ONE                                     = 0x01, */
+/*         BLEND_SRC_COLOR                               = 0x02, */
+/*         BLEND_ONE_MINUS_SRC_COLOR                     = 0x03, */
+/*         BLEND_SRC_ALPHA                               = 0x04, */
+/*         BLEND_ONE_MINUS_SRC_ALPHA                     = 0x05, */
+/*         BLEND_DST_ALPHA                               = 0x06, */
+/*         BLEND_ONE_MINUS_DST_ALPHA                     = 0x07, */
+/*         BLEND_DST_COLOR                               = 0x08, */
+/*         BLEND_ONE_MINUS_DST_COLOR                     = 0x09, */
+/*         BLEND_SRC_ALPHA_SATURATE                      = 0x0a, */
+/*         BLEND_BOTH_SRC_ALPHA                          = 0x0b, */
+/*         BLEND_BOTH_INV_SRC_ALPHA                      = 0x0c, */
+/*         BLEND_CONSTANT_COLOR                          = 0x0d, */
+/*         BLEND_ONE_MINUS_CONSTANT_COLOR                = 0x0e, */
+/*         BLEND_SRC1_COLOR                              = 0x0f, */
+/*         BLEND_INV_SRC1_COLOR                          = 0x10, */
+/*         BLEND_SRC1_ALPHA                              = 0x11, */
+/*         BLEND_INV_SRC1_ALPHA                          = 0x12, */
+/*         BLEND_CONSTANT_ALPHA                          = 0x13, */
+/*         BLEND_ONE_MINUS_CONSTANT_ALPHA                = 0x14, */
+/*     OPACITY_WEIGHT_bit                                = 1 << 13, */
+/*     ALPHA_SRCBLEND_mask                               = 0x1f << 16, */
+/*     ALPHA_SRCBLEND_shift                              = 16, */
+/*         BLEND_ZERO                                    = 0x00, */
+/*         BLEND_ONE                                     = 0x01, */
+/*         BLEND_SRC_COLOR                               = 0x02, */
+/*         BLEND_ONE_MINUS_SRC_COLOR                     = 0x03, */
+/*         BLEND_SRC_ALPHA                               = 0x04, */
+/*         BLEND_ONE_MINUS_SRC_ALPHA                     = 0x05, */
+/*         BLEND_DST_ALPHA                               = 0x06, */
+/*         BLEND_ONE_MINUS_DST_ALPHA                     = 0x07, */
+/*         BLEND_DST_COLOR                               = 0x08, */
+/*         BLEND_ONE_MINUS_DST_COLOR                     = 0x09, */
+/*         BLEND_SRC_ALPHA_SATURATE                      = 0x0a, */
+/*         BLEND_BOTH_SRC_ALPHA                          = 0x0b, */
+/*         BLEND_BOTH_INV_SRC_ALPHA                      = 0x0c, */
+/*         BLEND_CONSTANT_COLOR                          = 0x0d, */
+/*         BLEND_ONE_MINUS_CONSTANT_COLOR                = 0x0e, */
+/*         BLEND_SRC1_COLOR                              = 0x0f, */
+/*         BLEND_INV_SRC1_COLOR                          = 0x10, */
+/*         BLEND_SRC1_ALPHA                              = 0x11, */
+/*         BLEND_INV_SRC1_ALPHA                          = 0x12, */
+/*         BLEND_CONSTANT_ALPHA                          = 0x13, */
+/*         BLEND_ONE_MINUS_CONSTANT_ALPHA                = 0x14, */
+/*     ALPHA_COMB_FCN_mask                               = 0x07 << 21, */
+/*     ALPHA_COMB_FCN_shift                              = 21, */
+/*         COMB_DST_PLUS_SRC                             = 0x00, */
+/*         COMB_SRC_MINUS_DST                            = 0x01, */
+/*         COMB_MIN_DST_SRC                              = 0x02, */
+/*         COMB_MAX_DST_SRC                              = 0x03, */
+/*         COMB_DST_MINUS_SRC                            = 0x04, */
+/*     ALPHA_DESTBLEND_mask                              = 0x1f << 24, */
+/*     ALPHA_DESTBLEND_shift                             = 24, */
+/*         BLEND_ZERO                                    = 0x00, */
+/*         BLEND_ONE                                     = 0x01, */
+/*         BLEND_SRC_COLOR                               = 0x02, */
+/*         BLEND_ONE_MINUS_SRC_COLOR                     = 0x03, */
+/*         BLEND_SRC_ALPHA                               = 0x04, */
+/*         BLEND_ONE_MINUS_SRC_ALPHA                     = 0x05, */
+/*         BLEND_DST_ALPHA                               = 0x06, */
+/*         BLEND_ONE_MINUS_DST_ALPHA                     = 0x07, */
+/*         BLEND_DST_COLOR                               = 0x08, */
+/*         BLEND_ONE_MINUS_DST_COLOR                     = 0x09, */
+/*         BLEND_SRC_ALPHA_SATURATE                      = 0x0a, */
+/*         BLEND_BOTH_SRC_ALPHA                          = 0x0b, */
+/*         BLEND_BOTH_INV_SRC_ALPHA                      = 0x0c, */
+/*         BLEND_CONSTANT_COLOR                          = 0x0d, */
+/*         BLEND_ONE_MINUS_CONSTANT_COLOR                = 0x0e, */
+/*         BLEND_SRC1_COLOR                              = 0x0f, */
+/*         BLEND_INV_SRC1_COLOR                          = 0x10, */
+/*         BLEND_SRC1_ALPHA                              = 0x11, */
+/*         BLEND_INV_SRC1_ALPHA                          = 0x12, */
+/*         BLEND_CONSTANT_ALPHA                          = 0x13, */
+/*         BLEND_ONE_MINUS_CONSTANT_ALPHA                = 0x14, */
+/*     SEPARATE_ALPHA_BLEND_bit                          = 1 << 29, */
+    CB_COLOR_CONTROL                                      = 0x00028808,
+       FOG_ENABLE_bit                                    = 1 << 0,
+       MULTIWRITE_ENABLE_bit                             = 1 << 1,
+       DITHER_ENABLE_bit                                 = 1 << 2,
+       DEGAMMA_ENABLE_bit                                = 1 << 3,
+       SPECIAL_OP_mask                                   = 0x07 << 4,
+       SPECIAL_OP_shift                                  = 4,
+           SPECIAL_NORMAL                                = 0x00,
+           SPECIAL_DISABLE                               = 0x01,
+           SPECIAL_FAST_CLEAR                            = 0x02,
+           SPECIAL_FORCE_CLEAR                           = 0x03,
+           SPECIAL_EXPAND_COLOR                          = 0x04,
+           SPECIAL_EXPAND_TEXTURE                        = 0x05,
+           SPECIAL_EXPAND_SAMPLES                        = 0x06,
+           SPECIAL_RESOLVE_BOX                           = 0x07,
+       PER_MRT_BLEND_bit                                 = 1 << 7,
+       TARGET_BLEND_ENABLE_mask                          = 0xff << 8,
+       TARGET_BLEND_ENABLE_shift                         = 8,
+       ROP3_mask                                         = 0xff << 16,
+       ROP3_shift                                        = 16,
+    DB_SHADER_CONTROL                                     = 0x0002880c,
+       Z_EXPORT_ENABLE_bit                               = 1 << 0,
+       STENCIL_REF_EXPORT_ENABLE_bit                     = 1 << 1,
+       Z_ORDER_mask                                      = 0x03 << 4,
+       Z_ORDER_shift                                     = 4,
+           LATE_Z                                        = 0x00,
+           EARLY_Z_THEN_LATE_Z                           = 0x01,
+           RE_Z                                          = 0x02,
+           EARLY_Z_THEN_RE_Z                             = 0x03,
+       KILL_ENABLE_bit                                   = 1 << 6,
+       COVERAGE_TO_MASK_ENABLE_bit                       = 1 << 7,
+       MASK_EXPORT_ENABLE_bit                            = 1 << 8,
+       DUAL_EXPORT_ENABLE_bit                            = 1 << 9,
+       EXEC_ON_HIER_FAIL_bit                             = 1 << 10,
+       EXEC_ON_NOOP_bit                                  = 1 << 11,
+    PA_CL_CLIP_CNTL                                       = 0x00028810,
+       UCP_ENA_0_bit                                     = 1 << 0,
+       UCP_ENA_1_bit                                     = 1 << 1,
+       UCP_ENA_2_bit                                     = 1 << 2,
+       UCP_ENA_3_bit                                     = 1 << 3,
+       UCP_ENA_4_bit                                     = 1 << 4,
+       UCP_ENA_5_bit                                     = 1 << 5,
+       PS_UCP_Y_SCALE_NEG_bit                            = 1 << 13,
+       PS_UCP_MODE_mask                                  = 0x03 << 14,
+       PS_UCP_MODE_shift                                 = 14,
+       CLIP_DISABLE_bit                                  = 1 << 16,
+       UCP_CULL_ONLY_ENA_bit                             = 1 << 17,
+       BOUNDARY_EDGE_FLAG_ENA_bit                        = 1 << 18,
+       DX_CLIP_SPACE_DEF_bit                             = 1 << 19,
+       DIS_CLIP_ERR_DETECT_bit                           = 1 << 20,
+       VTX_KILL_OR_bit                                   = 1 << 21,
+       DX_LINEAR_ATTR_CLIP_ENA_bit                       = 1 << 24,
+       VTE_VPORT_PROVOKE_DISABLE_bit                     = 1 << 25,
+       ZCLIP_NEAR_DISABLE_bit                            = 1 << 26,
+       ZCLIP_FAR_DISABLE_bit                             = 1 << 27,
+    PA_SU_SC_MODE_CNTL                                    = 0x00028814,
+       CULL_FRONT_bit                                    = 1 << 0,
+       CULL_BACK_bit                                     = 1 << 1,
+       FACE_bit                                          = 1 << 2,
+       POLY_MODE_mask                                    = 0x03 << 3,
+       POLY_MODE_shift                                   = 3,
+           X_DISABLE_POLY_MODE                           = 0x00,
+           X_DUAL_MODE                                   = 0x01,
+       POLYMODE_FRONT_PTYPE_mask                         = 0x07 << 5,
+       POLYMODE_FRONT_PTYPE_shift                        = 5,
+           X_DRAW_POINTS                                 = 0x00,
+           X_DRAW_LINES                                  = 0x01,
+           X_DRAW_TRIANGLES                              = 0x02,
+       POLYMODE_BACK_PTYPE_mask                          = 0x07 << 8,
+       POLYMODE_BACK_PTYPE_shift                         = 8,
+/*         X_DRAW_POINTS                                 = 0x00, */
+/*         X_DRAW_LINES                                  = 0x01, */
+/*         X_DRAW_TRIANGLES                              = 0x02, */
+       POLY_OFFSET_FRONT_ENABLE_bit                      = 1 << 11,
+       POLY_OFFSET_BACK_ENABLE_bit                       = 1 << 12,
+       POLY_OFFSET_PARA_ENABLE_bit                       = 1 << 13,
+       VTX_WINDOW_OFFSET_ENABLE_bit                      = 1 << 16,
+       PROVOKING_VTX_LAST_bit                            = 1 << 19,
+       PERSP_CORR_DIS_bit                                = 1 << 20,
+       MULTI_PRIM_IB_ENA_bit                             = 1 << 21,
+    PA_CL_VTE_CNTL                                        = 0x00028818,
+       VPORT_X_SCALE_ENA_bit                             = 1 << 0,
+       VPORT_X_OFFSET_ENA_bit                            = 1 << 1,
+       VPORT_Y_SCALE_ENA_bit                             = 1 << 2,
+       VPORT_Y_OFFSET_ENA_bit                            = 1 << 3,
+       VPORT_Z_SCALE_ENA_bit                             = 1 << 4,
+       VPORT_Z_OFFSET_ENA_bit                            = 1 << 5,
+       VTX_XY_FMT_bit                                    = 1 << 8,
+       VTX_Z_FMT_bit                                     = 1 << 9,
+       VTX_W0_FMT_bit                                    = 1 << 10,
+       PERFCOUNTER_REF_bit                               = 1 << 11,
+    PA_CL_VS_OUT_CNTL                                     = 0x0002881c,
+       CLIP_DIST_ENA_0_bit                               = 1 << 0,
+       CLIP_DIST_ENA_1_bit                               = 1 << 1,
+       CLIP_DIST_ENA_2_bit                               = 1 << 2,
+       CLIP_DIST_ENA_3_bit                               = 1 << 3,
+       CLIP_DIST_ENA_4_bit                               = 1 << 4,
+       CLIP_DIST_ENA_5_bit                               = 1 << 5,
+       CLIP_DIST_ENA_6_bit                               = 1 << 6,
+       CLIP_DIST_ENA_7_bit                               = 1 << 7,
+       CULL_DIST_ENA_0_bit                               = 1 << 8,
+       CULL_DIST_ENA_1_bit                               = 1 << 9,
+       CULL_DIST_ENA_2_bit                               = 1 << 10,
+       CULL_DIST_ENA_3_bit                               = 1 << 11,
+       CULL_DIST_ENA_4_bit                               = 1 << 12,
+       CULL_DIST_ENA_5_bit                               = 1 << 13,
+       CULL_DIST_ENA_6_bit                               = 1 << 14,
+       CULL_DIST_ENA_7_bit                               = 1 << 15,
+       USE_VTX_POINT_SIZE_bit                            = 1 << 16,
+       USE_VTX_EDGE_FLAG_bit                             = 1 << 17,
+       USE_VTX_RENDER_TARGET_INDX_bit                    = 1 << 18,
+       USE_VTX_VIEWPORT_INDX_bit                         = 1 << 19,
+       USE_VTX_KILL_FLAG_bit                             = 1 << 20,
+       VS_OUT_MISC_VEC_ENA_bit                           = 1 << 21,
+       VS_OUT_CCDIST0_VEC_ENA_bit                        = 1 << 22,
+       VS_OUT_CCDIST1_VEC_ENA_bit                        = 1 << 23,
+    PA_CL_NANINF_CNTL                                     = 0x00028820,
+       VTE_XY_INF_DISCARD_bit                            = 1 << 0,
+       VTE_Z_INF_DISCARD_bit                             = 1 << 1,
+       VTE_W_INF_DISCARD_bit                             = 1 << 2,
+       VTE_0XNANINF_IS_0_bit                             = 1 << 3,
+       VTE_XY_NAN_RETAIN_bit                             = 1 << 4,
+       VTE_Z_NAN_RETAIN_bit                              = 1 << 5,
+       VTE_W_NAN_RETAIN_bit                              = 1 << 6,
+       VTE_W_RECIP_NAN_IS_0_bit                          = 1 << 7,
+       VS_XY_NAN_TO_INF_bit                              = 1 << 8,
+       VS_XY_INF_RETAIN_bit                              = 1 << 9,
+       VS_Z_NAN_TO_INF_bit                               = 1 << 10,
+       VS_Z_INF_RETAIN_bit                               = 1 << 11,
+       VS_W_NAN_TO_INF_bit                               = 1 << 12,
+       VS_W_INF_RETAIN_bit                               = 1 << 13,
+       VS_CLIP_DIST_INF_DISCARD_bit                      = 1 << 14,
+       VTE_NO_OUTPUT_NEG_0_bit                           = 1 << 20,
+    SQ_PGM_START_PS                                       = 0x00028840,
+    SQ_PGM_RESOURCES_PS                                   = 0x00028850,
+       NUM_GPRS_mask                                     = 0xff << 0,
+       NUM_GPRS_shift                                    = 0,
+       STACK_SIZE_mask                                   = 0xff << 8,
+       STACK_SIZE_shift                                  = 8,
+       SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit               = 1 << 21,
+       FETCH_CACHE_LINES_mask                            = 0x07 << 24,
+       FETCH_CACHE_LINES_shift                           = 24,
+       UNCACHED_FIRST_INST_bit                           = 1 << 28,
+       CLAMP_CONSTS_bit                                  = 1 << 31,
+    SQ_PGM_EXPORTS_PS                                     = 0x00028854,
+       EXPORT_MODE_mask                                  = 0x1f << 0,
+       EXPORT_MODE_shift                                 = 0,
+    SQ_PGM_START_VS                                       = 0x00028858,
+    SQ_PGM_RESOURCES_VS                                   = 0x00028868,
+/*     NUM_GPRS_mask                                     = 0xff << 0, */
+/*     NUM_GPRS_shift                                    = 0, */
+/*     STACK_SIZE_mask                                   = 0xff << 8, */
+/*     STACK_SIZE_shift                                  = 8, */
+       SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit               = 1 << 21,
+/*     FETCH_CACHE_LINES_mask                            = 0x07 << 24, */
+/*     FETCH_CACHE_LINES_shift                           = 24, */
+/*     UNCACHED_FIRST_INST_bit                           = 1 << 28, */
+    SQ_PGM_START_GS                                       = 0x0002886c,
+    SQ_PGM_RESOURCES_GS                                   = 0x0002887c,
+/*     NUM_GPRS_mask                                     = 0xff << 0, */
+/*     NUM_GPRS_shift                                    = 0, */
+/*     STACK_SIZE_mask                                   = 0xff << 8, */
+/*     STACK_SIZE_shift                                  = 8, */
+       SQ_PGM_RESOURCES_GS__DX10_CLAMP_bit               = 1 << 21,
+/*     FETCH_CACHE_LINES_mask                            = 0x07 << 24, */
+/*     FETCH_CACHE_LINES_shift                           = 24, */
+/*     UNCACHED_FIRST_INST_bit                           = 1 << 28, */
+    SQ_PGM_START_ES                                       = 0x00028880,
+    SQ_PGM_RESOURCES_ES                                   = 0x00028890,
+/*     NUM_GPRS_mask                                     = 0xff << 0, */
+/*     NUM_GPRS_shift                                    = 0, */
+/*     STACK_SIZE_mask                                   = 0xff << 8, */
+/*     STACK_SIZE_shift                                  = 8, */
+       SQ_PGM_RESOURCES_ES__DX10_CLAMP_bit               = 1 << 21,
+/*     FETCH_CACHE_LINES_mask                            = 0x07 << 24, */
+/*     FETCH_CACHE_LINES_shift                           = 24, */
+/*     UNCACHED_FIRST_INST_bit                           = 1 << 28, */
+    SQ_PGM_START_FS                                       = 0x00028894,
+    SQ_PGM_RESOURCES_FS                                   = 0x000288a4,
+/*     NUM_GPRS_mask                                     = 0xff << 0, */
+/*     NUM_GPRS_shift                                    = 0, */
+/*     STACK_SIZE_mask                                   = 0xff << 8, */
+/*     STACK_SIZE_shift                                  = 8, */
+       SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit               = 1 << 21,
+    SQ_ESGS_RING_ITEMSIZE                                 = 0x000288a8,
+       ITEMSIZE_mask                                     = 0x7fff << 0,
+       ITEMSIZE_shift                                    = 0,
+    SQ_GSVS_RING_ITEMSIZE                                 = 0x000288ac,
+/*     ITEMSIZE_mask                                     = 0x7fff << 0, */
+/*     ITEMSIZE_shift                                    = 0, */
+    SQ_ESTMP_RING_ITEMSIZE                                = 0x000288b0,
+/*     ITEMSIZE_mask                                     = 0x7fff << 0, */
+/*     ITEMSIZE_shift                                    = 0, */
+    SQ_GSTMP_RING_ITEMSIZE                                = 0x000288b4,
+/*     ITEMSIZE_mask                                     = 0x7fff << 0, */
+/*     ITEMSIZE_shift                                    = 0, */
+    SQ_VSTMP_RING_ITEMSIZE                                = 0x000288b8,
+/*     ITEMSIZE_mask                                     = 0x7fff << 0, */
+/*     ITEMSIZE_shift                                    = 0, */
+    SQ_PSTMP_RING_ITEMSIZE                                = 0x000288bc,
+/*     ITEMSIZE_mask                                     = 0x7fff << 0, */
+/*     ITEMSIZE_shift                                    = 0, */
+    SQ_FBUF_RING_ITEMSIZE                                 = 0x000288c0,
+/*     ITEMSIZE_mask                                     = 0x7fff << 0, */
+/*     ITEMSIZE_shift                                    = 0, */
+    SQ_REDUC_RING_ITEMSIZE                                = 0x000288c4,
+/*     ITEMSIZE_mask                                     = 0x7fff << 0, */
+/*     ITEMSIZE_shift                                    = 0, */
+    SQ_GS_VERT_ITEMSIZE                                   = 0x000288c8,
+/*     ITEMSIZE_mask                                     = 0x7fff << 0, */
+/*     ITEMSIZE_shift                                    = 0, */
+    SQ_PGM_CF_OFFSET_PS                                   = 0x000288cc,
+       PGM_CF_OFFSET_mask                                = 0xfffff << 0,
+       PGM_CF_OFFSET_shift                               = 0,
+    SQ_PGM_CF_OFFSET_VS                                   = 0x000288d0,
+/*     PGM_CF_OFFSET_mask                                = 0xfffff << 0, */
+/*     PGM_CF_OFFSET_shift                               = 0, */
+    SQ_PGM_CF_OFFSET_GS                                   = 0x000288d4,
+/*     PGM_CF_OFFSET_mask                                = 0xfffff << 0, */
+/*     PGM_CF_OFFSET_shift                               = 0, */
+    SQ_PGM_CF_OFFSET_ES                                   = 0x000288d8,
+/*     PGM_CF_OFFSET_mask                                = 0xfffff << 0, */
+/*     PGM_CF_OFFSET_shift                               = 0, */
+    SQ_PGM_CF_OFFSET_FS                                   = 0x000288dc,
+/*     PGM_CF_OFFSET_mask                                = 0xfffff << 0, */
+/*     PGM_CF_OFFSET_shift                               = 0, */
+    SQ_VTX_SEMANTIC_CLEAR                                 = 0x000288e0,
+    SQ_ALU_CONST_CACHE_PS_0                               = 0x00028940,
+       SQ_ALU_CONST_CACHE_PS_0_num                       = 16,
+    SQ_ALU_CONST_CACHE_VS_0                               = 0x00028980,
+       SQ_ALU_CONST_CACHE_VS_0_num                       = 16,
+    SQ_ALU_CONST_CACHE_GS_0                               = 0x000289c0,
+       SQ_ALU_CONST_CACHE_GS_0_num                       = 16,
+    PA_SU_POINT_SIZE                                      = 0x00028a00,
+       PA_SU_POINT_SIZE__HEIGHT_mask                     = 0xffff << 0,
+       PA_SU_POINT_SIZE__HEIGHT_shift                    = 0,
+       PA_SU_POINT_SIZE__WIDTH_mask                      = 0xffff << 16,
+       PA_SU_POINT_SIZE__WIDTH_shift                     = 16,
+    PA_SU_POINT_MINMAX                                    = 0x00028a04,
+       MIN_SIZE_mask                                     = 0xffff << 0,
+       MIN_SIZE_shift                                    = 0,
+       MAX_SIZE_mask                                     = 0xffff << 16,
+       MAX_SIZE_shift                                    = 16,
+    PA_SU_LINE_CNTL                                       = 0x00028a08,
+       PA_SU_LINE_CNTL__WIDTH_mask                       = 0xffff << 0,
+       PA_SU_LINE_CNTL__WIDTH_shift                      = 0,
+    PA_SC_LINE_STIPPLE                                    = 0x00028a0c,
+       LINE_PATTERN_mask                                 = 0xffff << 0,
+       LINE_PATTERN_shift                                = 0,
+       REPEAT_COUNT_mask                                 = 0xff << 16,
+       REPEAT_COUNT_shift                                = 16,
+       PATTERN_BIT_ORDER_bit                             = 1 << 28,
+       AUTO_RESET_CNTL_mask                              = 0x03 << 29,
+       AUTO_RESET_CNTL_shift                             = 29,
+    VGT_OUTPUT_PATH_CNTL                                  = 0x00028a10,
+       PATH_SELECT_mask                                  = 0x03 << 0,
+       PATH_SELECT_shift                                 = 0,
+           VGT_OUTPATH_VTX_REUSE                         = 0x00,
+           VGT_OUTPATH_TESS_EN                           = 0x01,
+           VGT_OUTPATH_PASSTHRU                          = 0x02,
+           VGT_OUTPATH_GS_BLOCK                          = 0x03,
+    VGT_HOS_CNTL                                          = 0x00028a14,
+       TESS_MODE_mask                                    = 0x03 << 0,
+       TESS_MODE_shift                                   = 0,
+    VGT_HOS_MAX_TESS_LEVEL                                = 0x00028a18,
+    VGT_HOS_MIN_TESS_LEVEL                                = 0x00028a1c,
+    VGT_HOS_REUSE_DEPTH                                   = 0x00028a20,
+       REUSE_DEPTH_mask                                  = 0xff << 0,
+       REUSE_DEPTH_shift                                 = 0,
+    VGT_GROUP_PRIM_TYPE                                   = 0x00028a24,
+       VGT_GROUP_PRIM_TYPE__PRIM_TYPE_mask               = 0x1f << 0,
+       VGT_GROUP_PRIM_TYPE__PRIM_TYPE_shift              = 0,
+           VGT_GRP_3D_POINT                              = 0x00,
+           VGT_GRP_3D_LINE                               = 0x01,
+           VGT_GRP_3D_TRI                                = 0x02,
+           VGT_GRP_3D_RECT                               = 0x03,
+           VGT_GRP_3D_QUAD                               = 0x04,
+           VGT_GRP_2D_COPY_RECT_V0                       = 0x05,
+           VGT_GRP_2D_COPY_RECT_V1                       = 0x06,
+           VGT_GRP_2D_COPY_RECT_V2                       = 0x07,
+           VGT_GRP_2D_COPY_RECT_V3                       = 0x08,
+           VGT_GRP_2D_FILL_RECT                          = 0x09,
+           VGT_GRP_2D_LINE                               = 0x0a,
+           VGT_GRP_2D_TRI                                = 0x0b,
+           VGT_GRP_PRIM_INDEX_LINE                       = 0x0c,
+           VGT_GRP_PRIM_INDEX_TRI                        = 0x0d,
+           VGT_GRP_PRIM_INDEX_QUAD                       = 0x0e,
+           VGT_GRP_3D_LINE_ADJ                           = 0x0f,
+           VGT_GRP_3D_TRI_ADJ                            = 0x10,
+       RETAIN_ORDER_bit                                  = 1 << 14,
+       RETAIN_QUADS_bit                                  = 1 << 15,
+       PRIM_ORDER_mask                                   = 0x07 << 16,
+       PRIM_ORDER_shift                                  = 16,
+           VGT_GRP_LIST                                  = 0x00,
+           VGT_GRP_STRIP                                 = 0x01,
+           VGT_GRP_FAN                                   = 0x02,
+           VGT_GRP_LOOP                                  = 0x03,
+           VGT_GRP_POLYGON                               = 0x04,
+    VGT_GROUP_FIRST_DECR                                  = 0x00028a28,
+       FIRST_DECR_mask                                   = 0x0f << 0,
+       FIRST_DECR_shift                                  = 0,
+    VGT_GROUP_DECR                                        = 0x00028a2c,
+       DECR_mask                                         = 0x0f << 0,
+       DECR_shift                                        = 0,
+    VGT_GROUP_VECT_0_CNTL                                 = 0x00028a30,
+       COMP_X_EN_bit                                     = 1 << 0,
+       COMP_Y_EN_bit                                     = 1 << 1,
+       COMP_Z_EN_bit                                     = 1 << 2,
+       COMP_W_EN_bit                                     = 1 << 3,
+       VGT_GROUP_VECT_0_CNTL__STRIDE_mask                = 0xff << 8,
+       VGT_GROUP_VECT_0_CNTL__STRIDE_shift               = 8,
+       SHIFT_mask                                        = 0xff << 16,
+       SHIFT_shift                                       = 16,
+    VGT_GROUP_VECT_1_CNTL                                 = 0x00028a34,
+/*     COMP_X_EN_bit                                     = 1 << 0, */
+/*     COMP_Y_EN_bit                                     = 1 << 1, */
+/*     COMP_Z_EN_bit                                     = 1 << 2, */
+/*     COMP_W_EN_bit                                     = 1 << 3, */
+       VGT_GROUP_VECT_1_CNTL__STRIDE_mask                = 0xff << 8,
+       VGT_GROUP_VECT_1_CNTL__STRIDE_shift               = 8,
+/*     SHIFT_mask                                        = 0xff << 16, */
+/*     SHIFT_shift                                       = 16, */
+    VGT_GROUP_VECT_0_FMT_CNTL                             = 0x00028a38,
+       X_CONV_mask                                       = 0x0f << 0,
+       X_CONV_shift                                      = 0,
+           VGT_GRP_INDEX_16                              = 0x00,
+           VGT_GRP_INDEX_32                              = 0x01,
+           VGT_GRP_UINT_16                               = 0x02,
+           VGT_GRP_UINT_32                               = 0x03,
+           VGT_GRP_SINT_16                               = 0x04,
+           VGT_GRP_SINT_32                               = 0x05,
+           VGT_GRP_FLOAT_32                              = 0x06,
+           VGT_GRP_AUTO_PRIM                             = 0x07,
+           VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08,
+       X_OFFSET_mask                                     = 0x0f << 4,
+       X_OFFSET_shift                                    = 4,
+       Y_CONV_mask                                       = 0x0f << 8,
+       Y_CONV_shift                                      = 8,
+/*         VGT_GRP_INDEX_16                              = 0x00, */
+/*         VGT_GRP_INDEX_32                              = 0x01, */
+/*         VGT_GRP_UINT_16                               = 0x02, */
+/*         VGT_GRP_UINT_32                               = 0x03, */
+/*         VGT_GRP_SINT_16                               = 0x04, */
+/*         VGT_GRP_SINT_32                               = 0x05, */
+/*         VGT_GRP_FLOAT_32                              = 0x06, */
+/*         VGT_GRP_AUTO_PRIM                             = 0x07, */
+/*         VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */
+       Y_OFFSET_mask                                     = 0x0f << 12,
+       Y_OFFSET_shift                                    = 12,
+       Z_CONV_mask                                       = 0x0f << 16,
+       Z_CONV_shift                                      = 16,
+/*         VGT_GRP_INDEX_16                              = 0x00, */
+/*         VGT_GRP_INDEX_32                              = 0x01, */
+/*         VGT_GRP_UINT_16                               = 0x02, */
+/*         VGT_GRP_UINT_32                               = 0x03, */
+/*         VGT_GRP_SINT_16                               = 0x04, */
+/*         VGT_GRP_SINT_32                               = 0x05, */
+/*         VGT_GRP_FLOAT_32                              = 0x06, */
+/*         VGT_GRP_AUTO_PRIM                             = 0x07, */
+/*         VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */
+       Z_OFFSET_mask                                     = 0x0f << 20,
+       Z_OFFSET_shift                                    = 20,
+       W_CONV_mask                                       = 0x0f << 24,
+       W_CONV_shift                                      = 24,
+/*         VGT_GRP_INDEX_16                              = 0x00, */
+/*         VGT_GRP_INDEX_32                              = 0x01, */
+/*         VGT_GRP_UINT_16                               = 0x02, */
+/*         VGT_GRP_UINT_32                               = 0x03, */
+/*         VGT_GRP_SINT_16                               = 0x04, */
+/*         VGT_GRP_SINT_32                               = 0x05, */
+/*         VGT_GRP_FLOAT_32                              = 0x06, */
+/*         VGT_GRP_AUTO_PRIM                             = 0x07, */
+/*         VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */
+       W_OFFSET_mask                                     = 0x0f << 28,
+       W_OFFSET_shift                                    = 28,
+    VGT_GROUP_VECT_1_FMT_CNTL                             = 0x00028a3c,
+/*     X_CONV_mask                                       = 0x0f << 0, */
+/*     X_CONV_shift                                      = 0, */
+/*         VGT_GRP_INDEX_16                              = 0x00, */
+/*         VGT_GRP_INDEX_32                              = 0x01, */
+/*         VGT_GRP_UINT_16                               = 0x02, */
+/*         VGT_GRP_UINT_32                               = 0x03, */
+/*         VGT_GRP_SINT_16                               = 0x04, */
+/*         VGT_GRP_SINT_32                               = 0x05, */
+/*         VGT_GRP_FLOAT_32                              = 0x06, */
+/*         VGT_GRP_AUTO_PRIM                             = 0x07, */
+/*         VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */
+/*     X_OFFSET_mask                                     = 0x0f << 4, */
+/*     X_OFFSET_shift                                    = 4, */
+/*     Y_CONV_mask                                       = 0x0f << 8, */
+/*     Y_CONV_shift                                      = 8, */
+/*         VGT_GRP_INDEX_16                              = 0x00, */
+/*         VGT_GRP_INDEX_32                              = 0x01, */
+/*         VGT_GRP_UINT_16                               = 0x02, */
+/*         VGT_GRP_UINT_32                               = 0x03, */
+/*         VGT_GRP_SINT_16                               = 0x04, */
+/*         VGT_GRP_SINT_32                               = 0x05, */
+/*         VGT_GRP_FLOAT_32                              = 0x06, */
+/*         VGT_GRP_AUTO_PRIM                             = 0x07, */
+/*         VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */
+/*     Y_OFFSET_mask                                     = 0x0f << 12, */
+/*     Y_OFFSET_shift                                    = 12, */
+/*     Z_CONV_mask                                       = 0x0f << 16, */
+/*     Z_CONV_shift                                      = 16, */
+/*         VGT_GRP_INDEX_16                              = 0x00, */
+/*         VGT_GRP_INDEX_32                              = 0x01, */
+/*         VGT_GRP_UINT_16                               = 0x02, */
+/*         VGT_GRP_UINT_32                               = 0x03, */
+/*         VGT_GRP_SINT_16                               = 0x04, */
+/*         VGT_GRP_SINT_32                               = 0x05, */
+/*         VGT_GRP_FLOAT_32                              = 0x06, */
+/*         VGT_GRP_AUTO_PRIM                             = 0x07, */
+/*         VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */
+/*     Z_OFFSET_mask                                     = 0x0f << 20, */
+/*     Z_OFFSET_shift                                    = 20, */
+/*     W_CONV_mask                                       = 0x0f << 24, */
+/*     W_CONV_shift                                      = 24, */
+/*         VGT_GRP_INDEX_16                              = 0x00, */
+/*         VGT_GRP_INDEX_32                              = 0x01, */
+/*         VGT_GRP_UINT_16                               = 0x02, */
+/*         VGT_GRP_UINT_32                               = 0x03, */
+/*         VGT_GRP_SINT_16                               = 0x04, */
+/*         VGT_GRP_SINT_32                               = 0x05, */
+/*         VGT_GRP_FLOAT_32                              = 0x06, */
+/*         VGT_GRP_AUTO_PRIM                             = 0x07, */
+/*         VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */
+/*     W_OFFSET_mask                                     = 0x0f << 28, */
+/*     W_OFFSET_shift                                    = 28, */
+    VGT_GS_MODE                                           = 0x00028a40,
+       MODE_mask                                         = 0x03 << 0,
+       MODE_shift                                        = 0,
+           GS_OFF                                        = 0x00,
+           GS_SCENARIO_A                                 = 0x01,
+           GS_SCENARIO_B                                 = 0x02,
+           GS_SCENARIO_G                                 = 0x03,
+       ES_PASSTHRU_bit                                   = 1 << 2,
+       CUT_MODE_mask                                     = 0x03 << 3,
+       CUT_MODE_shift                                    = 3,
+           GS_CUT_1024                                   = 0x00,
+           GS_CUT_512                                    = 0x01,
+           GS_CUT_256                                    = 0x02,
+           GS_CUT_128                                    = 0x03,
+    PA_SC_MPASS_PS_CNTL                                   = 0x00028a48,
+       MPASS_PIX_VEC_PER_PASS_mask                       = 0xfffff << 0,
+       MPASS_PIX_VEC_PER_PASS_shift                      = 0,
+       MPASS_PS_ENA_bit                                  = 1 << 31,
+    PA_SC_MODE_CNTL                                       = 0x00028a4c,
+       MSAA_ENABLE_bit                                   = 1 << 0,
+       CLIPRECT_ENABLE_bit                               = 1 << 1,
+       LINE_STIPPLE_ENABLE_bit                           = 1 << 2,
+       MULTI_CHIP_PRIM_DISCARD_ENAB_bit                  = 1 << 3,
+       WALK_ORDER_ENABLE_bit                             = 1 << 4,
+       HALVE_DETAIL_SAMPLE_PERF_bit                      = 1 << 5,
+       WALK_SIZE_bit                                     = 1 << 6,
+       WALK_ALIGNMENT_bit                                = 1 << 7,
+       WALK_ALIGN8_PRIM_FITS_ST_bit                      = 1 << 8,
+       TILE_COVER_NO_SCISSOR_bit                         = 1 << 9,
+       KILL_PIX_POST_HI_Z_bit                            = 1 << 10,
+       KILL_PIX_POST_DETAIL_MASK_bit                     = 1 << 11,
+       MULTI_CHIP_SUPERTILE_ENABLE_bit                   = 1 << 12,
+       TILE_COVER_DISABLE_bit                            = 1 << 13,
+       FORCE_EOV_CNTDWN_ENABLE_bit                       = 1 << 14,
+       FORCE_EOV_TILE_ENABLE_bit                         = 1 << 15,
+       FORCE_EOV_REZ_ENABLE_bit                          = 1 << 16,
+       PS_ITER_SAMPLE_bit                                = 1 << 17,
+    VGT_ENHANCE                                           = 0x00028a50,
+       VGT_ENHANCE__MI_TIMESTAMP_RES_mask                = 0x03 << 0,
+       VGT_ENHANCE__MI_TIMESTAMP_RES_shift               = 0,
+           X_0_992_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_32   = 0x00,
+           X_0_496_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_16   = 0x01,
+           X_0_248_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_8    = 0x02,
+           X_0_124_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_4    = 0x03,
+       MISC_mask                                         = 0x3fffffff << 2,
+       MISC_shift                                        = 2,
+    VGT_GS_OUT_PRIM_TYPE                                  = 0x00028a6c,
+       OUTPRIM_TYPE_mask                                 = 0x3f << 0,
+       OUTPRIM_TYPE_shift                                = 0,
+           POINTLIST                                     = 0x00,
+           LINESTRIP                                     = 0x01,
+           TRISTRIP                                      = 0x02,
+    VGT_DMA_SIZE                                          = 0x00028a74,
+    VGT_DMA_INDEX_TYPE                                    = 0x00028a7c,
+/*     INDEX_TYPE_mask                                   = 0x03 << 0, */
+/*     INDEX_TYPE_shift                                  = 0, */
+           VGT_INDEX_16                                  = 0x00,
+           VGT_INDEX_32                                  = 0x01,
+       SWAP_MODE_mask                                    = 0x03 << 2,
+       SWAP_MODE_shift                                   = 2,
+           VGT_DMA_SWAP_NONE                             = 0x00,
+           VGT_DMA_SWAP_16_BIT                           = 0x01,
+           VGT_DMA_SWAP_32_BIT                           = 0x02,
+           VGT_DMA_SWAP_WORD                             = 0x03,
+    VGT_PRIMITIVEID_EN                                    = 0x00028a84,
+       PRIMITIVEID_EN_bit                                = 1 << 0,
+    VGT_DMA_NUM_INSTANCES                                 = 0x00028a88,
+    VGT_EVENT_INITIATOR                                   = 0x00028a90,
+       EVENT_TYPE_mask                                   = 0x3f << 0,
+       EVENT_TYPE_shift                                  = 0,
+           CACHE_FLUSH_TS                                = 0x04,
+           CONTEXT_DONE                                  = 0x05,
+           CACHE_FLUSH                                   = 0x06,
+           VIZQUERY_START                                = 0x07,
+           VIZQUERY_END                                  = 0x08,
+           SC_WAIT_WC                                    = 0x09,
+           MPASS_PS_CP_REFETCH                           = 0x0a,
+           MPASS_PS_RST_START                            = 0x0b,
+           MPASS_PS_INCR_START                           = 0x0c,
+           RST_PIX_CNT                                   = 0x0d,
+           RST_VTX_CNT                                   = 0x0e,
+           VS_PARTIAL_FLUSH                              = 0x0f,
+           PS_PARTIAL_FLUSH                              = 0x10,
+           CACHE_FLUSH_AND_INV_TS_EVENT                  = 0x14,
+           ZPASS_DONE                                    = 0x15,
+           CACHE_FLUSH_AND_INV_EVENT                     = 0x16,
+           PERFCOUNTER_START                             = 0x17,
+           PERFCOUNTER_STOP                              = 0x18,
+           PIPELINESTAT_START                            = 0x19,
+           PIPELINESTAT_STOP                             = 0x1a,
+           PERFCOUNTER_SAMPLE                            = 0x1b,
+           FLUSH_ES_OUTPUT                               = 0x1c,
+           FLUSH_GS_OUTPUT                               = 0x1d,
+           SAMPLE_PIPELINESTAT                           = 0x1e,
+           SO_VGTSTREAMOUT_FLUSH                         = 0x1f,
+           SAMPLE_STREAMOUTSTATS                         = 0x20,
+           RESET_VTX_CNT                                 = 0x21,
+           BLOCK_CONTEXT_DONE                            = 0x22,
+           CR_CONTEXT_DONE                               = 0x23,
+           VGT_FLUSH                                     = 0x24,
+           CR_DONE_TS                                    = 0x25,
+           SQ_NON_EVENT                                  = 0x26,
+           SC_SEND_DB_VPZ                                = 0x27,
+           BOTTOM_OF_PIPE_TS                             = 0x28,
+           DB_CACHE_FLUSH_AND_INV                        = 0x2a,
+       ADDRESS_HI_mask                                   = 0xff << 19,
+       ADDRESS_HI_shift                                  = 19,
+       EXTENDED_EVENT_bit                                = 1 << 27,
+    VGT_MULTI_PRIM_IB_RESET_EN                            = 0x00028a94,
+       RESET_EN_bit                                      = 1 << 0,
+    VGT_INSTANCE_STEP_RATE_0                              = 0x00028aa0,
+    VGT_INSTANCE_STEP_RATE_1                              = 0x00028aa4,
+    VGT_STRMOUT_EN                                        = 0x00028ab0,
+       STREAMOUT_bit                                     = 1 << 0,
+    VGT_REUSE_OFF                                         = 0x00028ab4,
+       REUSE_OFF_bit                                     = 1 << 0,
+    VGT_VTX_CNT_EN                                        = 0x00028ab8,
+       VTX_CNT_EN_bit                                    = 1 << 0,
+    VGT_STRMOUT_BUFFER_SIZE_0                             = 0x00028ad0,
+    VGT_STRMOUT_VTX_STRIDE_0                              = 0x00028ad4,
+       VGT_STRMOUT_VTX_STRIDE_0__STRIDE_mask             = 0x3ff << 0,
+       VGT_STRMOUT_VTX_STRIDE_0__STRIDE_shift            = 0,
+    VGT_STRMOUT_BUFFER_BASE_0                             = 0x00028ad8,
+    VGT_STRMOUT_BUFFER_OFFSET_0                           = 0x00028adc,
+    VGT_STRMOUT_BUFFER_SIZE_1                             = 0x00028ae0,
+    VGT_STRMOUT_VTX_STRIDE_1                              = 0x00028ae4,
+       VGT_STRMOUT_VTX_STRIDE_1__STRIDE_mask             = 0x3ff << 0,
+       VGT_STRMOUT_VTX_STRIDE_1__STRIDE_shift            = 0,
+    VGT_STRMOUT_BUFFER_BASE_1                             = 0x00028ae8,
+    VGT_STRMOUT_BUFFER_OFFSET_1                           = 0x00028aec,
+    VGT_STRMOUT_BUFFER_SIZE_2                             = 0x00028af0,
+    VGT_STRMOUT_VTX_STRIDE_2                              = 0x00028af4,
+       VGT_STRMOUT_VTX_STRIDE_2__STRIDE_mask             = 0x3ff << 0,
+       VGT_STRMOUT_VTX_STRIDE_2__STRIDE_shift            = 0,
+    VGT_STRMOUT_BUFFER_BASE_2                             = 0x00028af8,
+    VGT_STRMOUT_BUFFER_OFFSET_2                           = 0x00028afc,
+    VGT_STRMOUT_BUFFER_SIZE_3                             = 0x00028b00,
+    VGT_STRMOUT_VTX_STRIDE_3                              = 0x00028b04,
+       VGT_STRMOUT_VTX_STRIDE_3__STRIDE_mask             = 0x3ff << 0,
+       VGT_STRMOUT_VTX_STRIDE_3__STRIDE_shift            = 0,
+    VGT_STRMOUT_BUFFER_BASE_3                             = 0x00028b08,
+    VGT_STRMOUT_BUFFER_OFFSET_3                           = 0x00028b0c,
+    VGT_STRMOUT_BASE_OFFSET_0                             = 0x00028b10,
+    VGT_STRMOUT_BASE_OFFSET_1                             = 0x00028b14,
+    VGT_STRMOUT_BASE_OFFSET_2                             = 0x00028b18,
+    VGT_STRMOUT_BASE_OFFSET_3                             = 0x00028b1c,
+    VGT_STRMOUT_BUFFER_EN                                 = 0x00028b20,
+       BUFFER_0_EN_bit                                   = 1 << 0,
+       BUFFER_1_EN_bit                                   = 1 << 1,
+       BUFFER_2_EN_bit                                   = 1 << 2,
+       BUFFER_3_EN_bit                                   = 1 << 3,
+    VGT_STRMOUT_DRAW_OPAQUE_OFFSET                        = 0x00028b28,
+    VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE            = 0x00028b2c,
+    VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE                 = 0x00028b30,
+    VGT_STRMOUT_BASE_OFFSET_HI_0                          = 0x00028b44,
+       VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_mask    = 0x3f << 0,
+       VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_shift   = 0,
+    VGT_STRMOUT_BASE_OFFSET_HI_1                          = 0x00028b48,
+       VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_mask    = 0x3f << 0,
+       VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_shift   = 0,
+    VGT_STRMOUT_BASE_OFFSET_HI_2                          = 0x00028b4c,
+       VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_mask    = 0x3f << 0,
+       VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_shift   = 0,
+    VGT_STRMOUT_BASE_OFFSET_HI_3                          = 0x00028b50,
+       VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_mask    = 0x3f << 0,
+       VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_shift   = 0,
+    PA_SC_LINE_CNTL                                       = 0x00028c00,
+       BRES_CNTL_mask                                    = 0xff << 0,
+       BRES_CNTL_shift                                   = 0,
+       USE_BRES_CNTL_bit                                 = 1 << 8,
+       EXPAND_LINE_WIDTH_bit                             = 1 << 9,
+       LAST_PIXEL_bit                                    = 1 << 10,
+    PA_SC_AA_CONFIG                                       = 0x00028c04,
+       MSAA_NUM_SAMPLES_mask                             = 0x03 << 0,
+       MSAA_NUM_SAMPLES_shift                            = 0,
+       AA_MASK_CENTROID_DTMN_bit                         = 1 << 4,
+       MAX_SAMPLE_DIST_mask                              = 0x0f << 13,
+       MAX_SAMPLE_DIST_shift                             = 13,
+    PA_SU_VTX_CNTL                                        = 0x00028c08,
+       PIX_CENTER_bit                                    = 1 << 0,
+       PA_SU_VTX_CNTL__ROUND_MODE_mask                   = 0x03 << 1,
+       PA_SU_VTX_CNTL__ROUND_MODE_shift                  = 1,
+           X_TRUNCATE                                    = 0x00,
+           X_ROUND                                       = 0x01,
+           X_ROUND_TO_EVEN                               = 0x02,
+           X_ROUND_TO_ODD                                = 0x03,
+       QUANT_MODE_mask                                   = 0x07 << 3,
+       QUANT_MODE_shift                                  = 3,
+           X_1_16TH                                      = 0x00,
+           X_1_8TH                                       = 0x01,
+           X_1_4TH                                       = 0x02,
+           X_1_2                                         = 0x03,
+           X_1                                           = 0x04,
+           X_1_256TH                                     = 0x05,
+    PA_CL_GB_VERT_CLIP_ADJ                                = 0x00028c0c,
+    PA_CL_GB_VERT_DISC_ADJ                                = 0x00028c10,
+    PA_CL_GB_HORZ_CLIP_ADJ                                = 0x00028c14,
+    PA_CL_GB_HORZ_DISC_ADJ                                = 0x00028c18,
+    PA_SC_AA_SAMPLE_LOCS_MCTX                             = 0x00028c1c,
+/*     S0_X_mask                                         = 0x0f << 0, */
+/*     S0_X_shift                                        = 0, */
+/*     S0_Y_mask                                         = 0x0f << 4, */
+/*     S0_Y_shift                                        = 4, */
+/*     S1_X_mask                                         = 0x0f << 8, */
+/*     S1_X_shift                                        = 8, */
+/*     S1_Y_mask                                         = 0x0f << 12, */
+/*     S1_Y_shift                                        = 12, */
+/*     S2_X_mask                                         = 0x0f << 16, */
+/*     S2_X_shift                                        = 16, */
+/*     S2_Y_mask                                         = 0x0f << 20, */
+/*     S2_Y_shift                                        = 20, */
+/*     S3_X_mask                                         = 0x0f << 24, */
+/*     S3_X_shift                                        = 24, */
+/*     S3_Y_mask                                         = 0x0f << 28, */
+/*     S3_Y_shift                                        = 28, */
+    PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX                      = 0x00028c20,
+/*     S4_X_mask                                         = 0x0f << 0, */
+/*     S4_X_shift                                        = 0, */
+/*     S4_Y_mask                                         = 0x0f << 4, */
+/*     S4_Y_shift                                        = 4, */
+/*     S5_X_mask                                         = 0x0f << 8, */
+/*     S5_X_shift                                        = 8, */
+/*     S5_Y_mask                                         = 0x0f << 12, */
+/*     S5_Y_shift                                        = 12, */
+/*     S6_X_mask                                         = 0x0f << 16, */
+/*     S6_X_shift                                        = 16, */
+/*     S6_Y_mask                                         = 0x0f << 20, */
+/*     S6_Y_shift                                        = 20, */
+/*     S7_X_mask                                         = 0x0f << 24, */
+/*     S7_X_shift                                        = 24, */
+/*     S7_Y_mask                                         = 0x0f << 28, */
+/*     S7_Y_shift                                        = 28, */
+    CB_CLRCMP_CONTROL                                     = 0x00028c30,
+       CLRCMP_FCN_SRC_mask                               = 0x07 << 0,
+       CLRCMP_FCN_SRC_shift                              = 0,
+           CLRCMP_DRAW_ALWAYS                            = 0x00,
+           CLRCMP_DRAW_NEVER                             = 0x01,
+           CLRCMP_DRAW_ON_NEQ                            = 0x04,
+           CLRCMP_DRAW_ON_EQ                             = 0x05,
+       CLRCMP_FCN_DST_mask                               = 0x07 << 8,
+       CLRCMP_FCN_DST_shift                              = 8,
+/*         CLRCMP_DRAW_ALWAYS                            = 0x00, */
+/*         CLRCMP_DRAW_NEVER                             = 0x01, */
+/*         CLRCMP_DRAW_ON_NEQ                            = 0x04, */
+/*         CLRCMP_DRAW_ON_EQ                             = 0x05, */
+       CLRCMP_FCN_SEL_mask                               = 0x03 << 24,
+       CLRCMP_FCN_SEL_shift                              = 24,
+           CLRCMP_SEL_DST                                = 0x00,
+           CLRCMP_SEL_SRC                                = 0x01,
+           CLRCMP_SEL_AND                                = 0x02,
+    CB_CLRCMP_SRC                                         = 0x00028c34,
+    CB_CLRCMP_DST                                         = 0x00028c38,
+    CB_CLRCMP_MSK                                         = 0x00028c3c,
+    PA_SC_AA_MASK                                         = 0x00028c48,
+    VGT_VERTEX_REUSE_BLOCK_CNTL                           = 0x00028c58,
+       VTX_REUSE_DEPTH_mask                              = 0xff << 0,
+       VTX_REUSE_DEPTH_shift                             = 0,
+    VGT_OUT_DEALLOC_CNTL                                  = 0x00028c5c,
+       DEALLOC_DIST_mask                                 = 0x7f << 0,
+       DEALLOC_DIST_shift                                = 0,
+    DB_RENDER_CONTROL                                     = 0x00028d0c,
+       DEPTH_CLEAR_ENABLE_bit                            = 1 << 0,
+       STENCIL_CLEAR_ENABLE_bit                          = 1 << 1,
+       DEPTH_COPY_bit                                    = 1 << 2,
+       STENCIL_COPY_bit                                  = 1 << 3,
+       RESUMMARIZE_ENABLE_bit                            = 1 << 4,
+       STENCIL_COMPRESS_DISABLE_bit                      = 1 << 5,
+       DEPTH_COMPRESS_DISABLE_bit                        = 1 << 6,
+       COPY_CENTROID_bit                                 = 1 << 7,
+       COPY_SAMPLE_mask                                  = 0x07 << 8,
+       COPY_SAMPLE_shift                                 = 8,
+       ZPASS_INCREMENT_DISABLE_bit                       = 1 << 11,
+    DB_RENDER_OVERRIDE                                    = 0x00028d10,
+       FORCE_HIZ_ENABLE_mask                             = 0x03 << 0,
+       FORCE_HIZ_ENABLE_shift                            = 0,
+           FORCE_OFF                                     = 0x00,
+           FORCE_ENABLE                                  = 0x01,
+           FORCE_DISABLE                                 = 0x02,
+           FORCE_RESERVED                                = 0x03,
+       FORCE_HIS_ENABLE0_mask                            = 0x03 << 2,
+       FORCE_HIS_ENABLE0_shift                           = 2,
+/*         FORCE_OFF                                     = 0x00, */
+/*         FORCE_ENABLE                                  = 0x01, */
+/*         FORCE_DISABLE                                 = 0x02, */
+/*         FORCE_RESERVED                                = 0x03, */
+       FORCE_HIS_ENABLE1_mask                            = 0x03 << 4,
+       FORCE_HIS_ENABLE1_shift                           = 4,
+/*         FORCE_OFF                                     = 0x00, */
+/*         FORCE_ENABLE                                  = 0x01, */
+/*         FORCE_DISABLE                                 = 0x02, */
+/*         FORCE_RESERVED                                = 0x03, */
+       FORCE_SHADER_Z_ORDER_bit                          = 1 << 6,
+       FAST_Z_DISABLE_bit                                = 1 << 7,
+       FAST_STENCIL_DISABLE_bit                          = 1 << 8,
+       NOOP_CULL_DISABLE_bit                             = 1 << 9,
+       FORCE_COLOR_KILL_bit                              = 1 << 10,
+       FORCE_Z_READ_bit                                  = 1 << 11,
+       FORCE_STENCIL_READ_bit                            = 1 << 12,
+       FORCE_FULL_Z_RANGE_mask                           = 0x03 << 13,
+       FORCE_FULL_Z_RANGE_shift                          = 13,
+/*         FORCE_OFF                                     = 0x00, */
+/*         FORCE_ENABLE                                  = 0x01, */
+/*         FORCE_DISABLE                                 = 0x02, */
+/*         FORCE_RESERVED                                = 0x03, */
+       FORCE_QC_SMASK_CONFLICT_bit                       = 1 << 15,
+       DISABLE_VIEWPORT_CLAMP_bit                        = 1 << 16,
+       IGNORE_SC_ZRANGE_bit                              = 1 << 17,
+    DB_HTILE_SURFACE                                      = 0x00028d24,
+       HTILE_WIDTH_bit                                   = 1 << 0,
+       HTILE_HEIGHT_bit                                  = 1 << 1,
+       LINEAR_bit                                        = 1 << 2,
+       FULL_CACHE_bit                                    = 1 << 3,
+       HTILE_USES_PRELOAD_WIN_bit                        = 1 << 4,
+       PRELOAD_bit                                       = 1 << 5,
+       PREFETCH_WIDTH_mask                               = 0x3f << 6,
+       PREFETCH_WIDTH_shift                              = 6,
+       PREFETCH_HEIGHT_mask                              = 0x3f << 12,
+       PREFETCH_HEIGHT_shift                             = 12,
+    DB_SRESULTS_COMPARE_STATE1                            = 0x00028d2c,
+       COMPAREFUNC1_mask                                 = 0x07 << 0,
+       COMPAREFUNC1_shift                                = 0,
+/*         REF_NEVER                                     = 0x00, */
+/*         REF_LESS                                      = 0x01, */
+/*         REF_EQUAL                                     = 0x02, */
+/*         REF_LEQUAL                                    = 0x03, */
+/*         REF_GREATER                                   = 0x04, */
+/*         REF_NOTEQUAL                                  = 0x05, */
+/*         REF_GEQUAL                                    = 0x06, */
+/*         REF_ALWAYS                                    = 0x07, */
+       COMPAREVALUE1_mask                                = 0xff << 4,
+       COMPAREVALUE1_shift                               = 4,
+       COMPAREMASK1_mask                                 = 0xff << 12,
+       COMPAREMASK1_shift                                = 12,
+       ENABLE1_bit                                       = 1 << 24,
+    DB_PRELOAD_CONTROL                                    = 0x00028d30,
+       START_X_mask                                      = 0xff << 0,
+       START_X_shift                                     = 0,
+       START_Y_mask                                      = 0xff << 8,
+       START_Y_shift                                     = 8,
+       MAX_X_mask                                        = 0xff << 16,
+       MAX_X_shift                                       = 16,
+       MAX_Y_mask                                        = 0xff << 24,
+       MAX_Y_shift                                       = 24,
+    DB_PREFETCH_LIMIT                                     = 0x00028d34,
+       DEPTH_HEIGHT_TILE_MAX_mask                        = 0x3ff << 0,
+       DEPTH_HEIGHT_TILE_MAX_shift                       = 0,
+    PA_SU_POLY_OFFSET_DB_FMT_CNTL                         = 0x00028df8,
+       POLY_OFFSET_NEG_NUM_DB_BITS_mask                  = 0xff << 0,
+       POLY_OFFSET_NEG_NUM_DB_BITS_shift                 = 0,
+       POLY_OFFSET_DB_IS_FLOAT_FMT_bit                   = 1 << 8,
+    PA_SU_POLY_OFFSET_CLAMP                               = 0x00028dfc,
+    PA_SU_POLY_OFFSET_FRONT_SCALE                         = 0x00028e00,
+    PA_SU_POLY_OFFSET_FRONT_OFFSET                        = 0x00028e04,
+    PA_SU_POLY_OFFSET_BACK_SCALE                          = 0x00028e08,
+    PA_SU_POLY_OFFSET_BACK_OFFSET                         = 0x00028e0c,
+    PA_CL_POINT_X_RAD                                     = 0x00028e10,
+    PA_CL_POINT_Y_RAD                                     = 0x00028e14,
+    PA_CL_POINT_SIZE                                      = 0x00028e18,
+    PA_CL_POINT_CULL_RAD                                  = 0x00028e1c,
+    PA_CL_UCP_0_X                                         = 0x00028e20,
+       PA_CL_UCP_0_X_num                                 = 6,
+       PA_CL_UCP_0_X_offset                              = 16,
+    PA_CL_UCP_0_Y                                         = 0x00028e24,
+       PA_CL_UCP_0_Y_num                                 = 6,
+       PA_CL_UCP_0_Y_offset                              = 16,
+    PA_CL_UCP_0_Z                                         = 0x00028e28,
+       PA_CL_UCP_0_Z_num                                 = 6,
+       PA_CL_UCP_0_Z_offset                              = 16,
+    SQ_ALU_CONSTANT0_0                                    = 0x00030000,
+    SQ_ALU_CONSTANT1_0                                    = 0x00030004,
+    SQ_ALU_CONSTANT2_0                                    = 0x00030008,
+    SQ_ALU_CONSTANT3_0                                    = 0x0003000c,
+    SQ_VTX_CONSTANT_WORD0_0                               = 0x00038000,
+    SQ_TEX_RESOURCE_WORD0_0                               = 0x00038000,
+       DIM_mask                                          = 0x07 << 0,
+       DIM_shift                                         = 0,
+           SQ_TEX_DIM_1D                                 = 0x00,
+           SQ_TEX_DIM_2D                                 = 0x01,
+           SQ_TEX_DIM_3D                                 = 0x02,
+           SQ_TEX_DIM_CUBEMAP                            = 0x03,
+           SQ_TEX_DIM_1D_ARRAY                           = 0x04,
+           SQ_TEX_DIM_2D_ARRAY                           = 0x05,
+           SQ_TEX_DIM_2D_MSAA                            = 0x06,
+           SQ_TEX_DIM_2D_ARRAY_MSAA                      = 0x07,
+       SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask           = 0x0f << 3,
+       SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift          = 3,
+       TILE_TYPE_bit                                     = 1 << 7,
+       PITCH_mask                                        = 0x7ff << 8,
+       PITCH_shift                                       = 8,
+       TEX_WIDTH_mask                                    = 0x1fff << 19,
+       TEX_WIDTH_shift                                   = 19,
+    SQ_VTX_CONSTANT_WORD1_0                               = 0x00038004,
+    SQ_TEX_RESOURCE_WORD1_0                               = 0x00038004,
+       TEX_HEIGHT_mask                                   = 0x1fff << 0,
+       TEX_HEIGHT_shift                                  = 0,
+       TEX_DEPTH_mask                                    = 0x1fff << 13,
+       TEX_DEPTH_shift                                   = 13,
+       SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask         = 0x3f << 26,
+       SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift        = 26,
+    SQ_VTX_CONSTANT_WORD2_0                               = 0x00038008,
+       BASE_ADDRESS_HI_mask                              = 0xff << 0,
+       BASE_ADDRESS_HI_shift                             = 0,
+       SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask              = 0x7ff << 8,
+       SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift             = 8,
+       SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit              = 1 << 19,
+       SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask         = 0x3f << 20,
+       SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift        = 20,
+       SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask      = 0x03 << 26,
+       SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift     = 26,
+/*         SQ_NUM_FORMAT_NORM                            = 0x00, */
+/*         SQ_NUM_FORMAT_INT                             = 0x01, */
+/*         SQ_NUM_FORMAT_SCALED                          = 0x02, */
+       SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit      = 1 << 28,
+       SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit         = 1 << 29,
+       SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_mask         = 0x03 << 30,
+       SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift        = 30,
+/*         SQ_ENDIAN_NONE                                = 0x00, */
+/*         SQ_ENDIAN_8IN16                               = 0x01, */
+/*         SQ_ENDIAN_8IN32                               = 0x02, */
+    SQ_TEX_RESOURCE_WORD2_0                               = 0x00038008,
+    SQ_VTX_CONSTANT_WORD3_0                               = 0x0003800c,
+       MEM_REQUEST_SIZE_mask                             = 0x03 << 0,
+       MEM_REQUEST_SIZE_shift                            = 0,
+    SQ_TEX_RESOURCE_WORD3_0                               = 0x0003800c,
+    SQ_TEX_RESOURCE_WORD4_0                               = 0x00038010,
+       FORMAT_COMP_X_mask                                = 0x03 << 0,
+       FORMAT_COMP_X_shift                               = 0,
+           SQ_FORMAT_COMP_UNSIGNED                       = 0x00,
+           SQ_FORMAT_COMP_SIGNED                         = 0x01,
+           SQ_FORMAT_COMP_UNSIGNED_BIASED                = 0x02,
+       FORMAT_COMP_Y_mask                                = 0x03 << 2,
+       FORMAT_COMP_Y_shift                               = 2,
+/*         SQ_FORMAT_COMP_UNSIGNED                       = 0x00, */
+/*         SQ_FORMAT_COMP_SIGNED                         = 0x01, */
+/*         SQ_FORMAT_COMP_UNSIGNED_BIASED                = 0x02, */
+       FORMAT_COMP_Z_mask                                = 0x03 << 4,
+       FORMAT_COMP_Z_shift                               = 4,
+/*         SQ_FORMAT_COMP_UNSIGNED                       = 0x00, */
+/*         SQ_FORMAT_COMP_SIGNED                         = 0x01, */
+/*         SQ_FORMAT_COMP_UNSIGNED_BIASED                = 0x02, */
+       FORMAT_COMP_W_mask                                = 0x03 << 6,
+       FORMAT_COMP_W_shift                               = 6,
+/*         SQ_FORMAT_COMP_UNSIGNED                       = 0x00, */
+/*         SQ_FORMAT_COMP_SIGNED                         = 0x01, */
+/*         SQ_FORMAT_COMP_UNSIGNED_BIASED                = 0x02, */
+       SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_mask      = 0x03 << 8,
+       SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift     = 8,
+/*         SQ_NUM_FORMAT_NORM                            = 0x00, */
+/*         SQ_NUM_FORMAT_INT                             = 0x01, */
+/*         SQ_NUM_FORMAT_SCALED                          = 0x02, */
+       SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit         = 1 << 10,
+       SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit        = 1 << 11,
+       SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_mask         = 0x03 << 12,
+       SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift        = 12,
+/*         SQ_ENDIAN_NONE                                = 0x00, */
+/*         SQ_ENDIAN_8IN16                               = 0x01, */
+/*         SQ_ENDIAN_8IN32                               = 0x02, */
+       REQUEST_SIZE_mask                                 = 0x03 << 14,
+       REQUEST_SIZE_shift                                = 14,
+       SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask           = 0x07 << 16,
+       SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift          = 16,
+/*         SQ_SEL_X                                      = 0x00, */
+/*         SQ_SEL_Y                                      = 0x01, */
+/*         SQ_SEL_Z                                      = 0x02, */
+/*         SQ_SEL_W                                      = 0x03, */
+/*         SQ_SEL_0                                      = 0x04, */
+/*         SQ_SEL_1                                      = 0x05, */
+       SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask           = 0x07 << 19,
+       SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift          = 19,
+/*         SQ_SEL_X                                      = 0x00, */
+/*         SQ_SEL_Y                                      = 0x01, */
+/*         SQ_SEL_Z                                      = 0x02, */
+/*         SQ_SEL_W                                      = 0x03, */
+/*         SQ_SEL_0                                      = 0x04, */
+/*         SQ_SEL_1                                      = 0x05, */
+       SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask           = 0x07 << 22,
+       SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift          = 22,
+/*         SQ_SEL_X                                      = 0x00, */
+/*         SQ_SEL_Y                                      = 0x01, */
+/*         SQ_SEL_Z                                      = 0x02, */
+/*         SQ_SEL_W                                      = 0x03, */
+/*         SQ_SEL_0                                      = 0x04, */
+/*         SQ_SEL_1                                      = 0x05, */
+       SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask           = 0x07 << 25,
+       SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift          = 25,
+/*         SQ_SEL_X                                      = 0x00, */
+/*         SQ_SEL_Y                                      = 0x01, */
+/*         SQ_SEL_Z                                      = 0x02, */
+/*         SQ_SEL_W                                      = 0x03, */
+/*         SQ_SEL_0                                      = 0x04, */
+/*         SQ_SEL_1                                      = 0x05, */
+       BASE_LEVEL_mask                                   = 0x0f << 28,
+       BASE_LEVEL_shift                                  = 28,
+    SQ_TEX_RESOURCE_WORD5_0                               = 0x00038014,
+       LAST_LEVEL_mask                                   = 0x0f << 0,
+       LAST_LEVEL_shift                                  = 0,
+       BASE_ARRAY_mask                                   = 0x1fff << 4,
+       BASE_ARRAY_shift                                  = 4,
+       LAST_ARRAY_mask                                   = 0x1fff << 17,
+       LAST_ARRAY_shift                                  = 17,
+    SQ_TEX_RESOURCE_WORD6_0                               = 0x00038018,
+       MPEG_CLAMP_mask                                   = 0x03 << 0,
+       MPEG_CLAMP_shift                                  = 0,
+           SQ_TEX_MPEG_CLAMP_OFF                         = 0x00,
+           SQ_TEX_MPEG_9                                 = 0x01,
+           SQ_TEX_MPEG_10                                = 0x02,
+       PERF_MODULATION_mask                              = 0x07 << 5,
+       PERF_MODULATION_shift                             = 5,
+       INTERLACED_bit                                    = 1 << 8,
+       SQ_TEX_RESOURCE_WORD6_0__TYPE_mask                = 0x03 << 30,
+       SQ_TEX_RESOURCE_WORD6_0__TYPE_shift               = 30,
+           SQ_TEX_VTX_INVALID_TEXTURE                    = 0x00,
+           SQ_TEX_VTX_INVALID_BUFFER                     = 0x01,
+           SQ_TEX_VTX_VALID_TEXTURE                      = 0x02,
+           SQ_TEX_VTX_VALID_BUFFER                       = 0x03,
+    SQ_VTX_CONSTANT_WORD6_0                               = 0x00038018,
+       SQ_VTX_CONSTANT_WORD6_0__TYPE_mask                = 0x03 << 30,
+       SQ_VTX_CONSTANT_WORD6_0__TYPE_shift               = 30,
+/*         SQ_TEX_VTX_INVALID_TEXTURE                    = 0x00, */
+/*         SQ_TEX_VTX_INVALID_BUFFER                     = 0x01, */
+/*         SQ_TEX_VTX_VALID_TEXTURE                      = 0x02, */
+/*         SQ_TEX_VTX_VALID_BUFFER                       = 0x03, */
+    SQ_TEX_SAMPLER_WORD0_0                                = 0x0003c000,
+       SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask              = 0x07 << 0,
+       SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift             = 0,
+           SQ_TEX_WRAP                                   = 0x00,
+           SQ_TEX_MIRROR                                 = 0x01,
+           SQ_TEX_CLAMP_LAST_TEXEL                       = 0x02,
+           SQ_TEX_MIRROR_ONCE_LAST_TEXEL                 = 0x03,
+           SQ_TEX_CLAMP_HALF_BORDER                      = 0x04,
+           SQ_TEX_MIRROR_ONCE_HALF_BORDER                = 0x05,
+           SQ_TEX_CLAMP_BORDER                           = 0x06,
+           SQ_TEX_MIRROR_ONCE_BORDER                     = 0x07,
+       CLAMP_Y_mask                                      = 0x07 << 3,
+       CLAMP_Y_shift                                     = 3,
+/*         SQ_TEX_WRAP                                   = 0x00, */
+/*         SQ_TEX_MIRROR                                 = 0x01, */
+/*         SQ_TEX_CLAMP_LAST_TEXEL                       = 0x02, */
+/*         SQ_TEX_MIRROR_ONCE_LAST_TEXEL                 = 0x03, */
+/*         SQ_TEX_CLAMP_HALF_BORDER                      = 0x04, */
+/*         SQ_TEX_MIRROR_ONCE_HALF_BORDER                = 0x05, */
+/*         SQ_TEX_CLAMP_BORDER                           = 0x06, */
+/*         SQ_TEX_MIRROR_ONCE_BORDER                     = 0x07, */
+       CLAMP_Z_mask                                      = 0x07 << 6,
+       CLAMP_Z_shift                                     = 6,
+/*         SQ_TEX_WRAP                                   = 0x00, */
+/*         SQ_TEX_MIRROR                                 = 0x01, */
+/*         SQ_TEX_CLAMP_LAST_TEXEL                       = 0x02, */
+/*         SQ_TEX_MIRROR_ONCE_LAST_TEXEL                 = 0x03, */
+/*         SQ_TEX_CLAMP_HALF_BORDER                      = 0x04, */
+/*         SQ_TEX_MIRROR_ONCE_HALF_BORDER                = 0x05, */
+/*         SQ_TEX_CLAMP_BORDER                           = 0x06, */
+/*         SQ_TEX_MIRROR_ONCE_BORDER                     = 0x07, */
+       XY_MAG_FILTER_mask                                = 0x07 << 9,
+       XY_MAG_FILTER_shift                               = 9,
+           SQ_TEX_XY_FILTER_POINT                        = 0x00,
+           SQ_TEX_XY_FILTER_BILINEAR                     = 0x01,
+           SQ_TEX_XY_FILTER_BICUBIC                      = 0x02,
+       XY_MIN_FILTER_mask                                = 0x07 << 12,
+       XY_MIN_FILTER_shift                               = 12,
+/*         SQ_TEX_XY_FILTER_POINT                        = 0x00, */
+/*         SQ_TEX_XY_FILTER_BILINEAR                     = 0x01, */
+/*         SQ_TEX_XY_FILTER_BICUBIC                      = 0x02, */
+       Z_FILTER_mask                                     = 0x03 << 15,
+       Z_FILTER_shift                                    = 15,
+           SQ_TEX_Z_FILTER_NONE                          = 0x00,
+           SQ_TEX_Z_FILTER_POINT                         = 0x01,
+           SQ_TEX_Z_FILTER_LINEAR                        = 0x02,
+       MIP_FILTER_mask                                   = 0x03 << 17,
+       MIP_FILTER_shift                                  = 17,
+/*         SQ_TEX_Z_FILTER_NONE                          = 0x00, */
+/*         SQ_TEX_Z_FILTER_POINT                         = 0x01, */
+/*         SQ_TEX_Z_FILTER_LINEAR                        = 0x02, */
+       BORDER_COLOR_TYPE_mask                            = 0x03 << 22,
+       BORDER_COLOR_TYPE_shift                           = 22,
+           SQ_TEX_BORDER_COLOR_TRANS_BLACK               = 0x00,
+           SQ_TEX_BORDER_COLOR_OPAQUE_BLACK              = 0x01,
+           SQ_TEX_BORDER_COLOR_OPAQUE_WHITE              = 0x02,
+           SQ_TEX_BORDER_COLOR_REGISTER                  = 0x03,
+       POINT_SAMPLING_CLAMP_bit                          = 1 << 24,
+       TEX_ARRAY_OVERRIDE_bit                            = 1 << 25,
+       DEPTH_COMPARE_FUNCTION_mask                       = 0x07 << 26,
+       DEPTH_COMPARE_FUNCTION_shift                      = 26,
+           SQ_TEX_DEPTH_COMPARE_NEVER                    = 0x00,
+           SQ_TEX_DEPTH_COMPARE_LESS                     = 0x01,
+           SQ_TEX_DEPTH_COMPARE_EQUAL                    = 0x02,
+           SQ_TEX_DEPTH_COMPARE_LESSEQUAL                = 0x03,
+           SQ_TEX_DEPTH_COMPARE_GREATER                  = 0x04,
+           SQ_TEX_DEPTH_COMPARE_NOTEQUAL                 = 0x05,
+           SQ_TEX_DEPTH_COMPARE_GREATEREQUAL             = 0x06,
+           SQ_TEX_DEPTH_COMPARE_ALWAYS                   = 0x07,
+       CHROMA_KEY_mask                                   = 0x03 << 29,
+       CHROMA_KEY_shift                                  = 29,
+           SQ_TEX_CHROMA_KEY_DISABLED                    = 0x00,
+           SQ_TEX_CHROMA_KEY_KILL                        = 0x01,
+           SQ_TEX_CHROMA_KEY_BLEND                       = 0x02,
+       LOD_USES_MINOR_AXIS_bit                           = 1 << 31,
+    SQ_TEX_SAMPLER_WORD1_0                                = 0x0003c004,
+       MIN_LOD_mask                                      = 0x3ff << 0,
+       MIN_LOD_shift                                     = 0,
+       MAX_LOD_mask                                      = 0x3ff << 10,
+       MAX_LOD_shift                                     = 10,
+       SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_mask             = 0xfff << 20,
+       SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift            = 20,
+    SQ_TEX_SAMPLER_WORD2_0                                = 0x0003c008,
+       LOD_BIAS_SEC_mask                                 = 0xfff << 0,
+       LOD_BIAS_SEC_shift                                = 0,
+       MC_COORD_TRUNCATE_bit                             = 1 << 12,
+       SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit         = 1 << 13,
+       HIGH_PRECISION_FILTER_bit                         = 1 << 14,
+       PERF_MIP_mask                                     = 0x07 << 15,
+       PERF_MIP_shift                                    = 15,
+       PERF_Z_mask                                       = 0x03 << 18,
+       PERF_Z_shift                                      = 18,
+       FETCH_4_bit                                       = 1 << 26,
+       SAMPLE_IS_PCF_bit                                 = 1 << 27,
+       SQ_TEX_SAMPLER_WORD2_0__TYPE_bit                  = 1 << 31,
+    SQ_VTX_BASE_VTX_LOC                                   = 0x0003cff0,
+    SQ_VTX_START_INST_LOC                                 = 0x0003cff4,
+    SQ_LOOP_CONST_DX10_0                                  = 0x0003e200,
+    SQ_LOOP_CONST_0                                       = 0x0003e200,
+       SQ_LOOP_CONST_0__COUNT_mask                       = 0xfff << 0,
+       SQ_LOOP_CONST_0__COUNT_shift                      = 0,
+       INIT_mask                                         = 0xfff << 12,
+       INIT_shift                                        = 12,
+       INC_mask                                          = 0xff << 24,
+       INC_shift                                         = 24,
+    SQ_BOOL_CONST_0                                       = 0x0003e380,
+       SQ_BOOL_CONST_0_num                               = 3,
+
+} ;
+
+#endif /* _AUTOREGS */
+
diff --git a/src/mesa/drivers/dri/r600/r600_reg_r6xx.h b/src/mesa/drivers/dri/r600/r600_reg_r6xx.h
new file mode 100644 (file)
index 0000000..f7702c4
--- /dev/null
@@ -0,0 +1,492 @@
+/*
+ * RadeonHD R6xx, R7xx Register documentation
+ *
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ * Copyright (C) 2008-2009  Matthias Hopf
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _R600_REG_R6xx_H_
+#define _R600_REG_R6xx_H_
+
+/*
+ * Registers for R6xx chips that are not documented yet
+ */
+
+enum {
+
+    MM_INDEX                                              = 0x0000,
+    MM_DATA                                               = 0x0004,
+
+    SRBM_STATUS                                           = 0x0e50,
+       RLC_RQ_PENDING_bit                                = 1 << 3,
+       RCU_RQ_PENDING_bit                                = 1 << 4,
+       GRBM_RQ_PENDING_bit                               = 1 << 5,
+       HI_RQ_PENDING_bit                                 = 1 << 6,
+       IO_EXTERN_SIGNAL_bit                              = 1 << 7,
+       VMC_BUSY_bit                                      = 1 << 8,
+       MCB_BUSY_bit                                      = 1 << 9,
+       MCDZ_BUSY_bit                                     = 1 << 10,
+       MCDY_BUSY_bit                                     = 1 << 11,
+       MCDX_BUSY_bit                                     = 1 << 12,
+       MCDW_BUSY_bit                                     = 1 << 13,
+       SEM_BUSY_bit                                      = 1 << 14,
+       SRBM_STATUS__RLC_BUSY_bit                         = 1 << 15,
+       PDMA_BUSY_bit                                     = 1 << 16,
+       IH_BUSY_bit                                       = 1 << 17,
+       CSC_BUSY_bit                                      = 1 << 20,
+       CMC7_BUSY_bit                                     = 1 << 21,
+       CMC6_BUSY_bit                                     = 1 << 22,
+       CMC5_BUSY_bit                                     = 1 << 23,
+       CMC4_BUSY_bit                                     = 1 << 24,
+       CMC3_BUSY_bit                                     = 1 << 25,
+       CMC2_BUSY_bit                                     = 1 << 26,
+       CMC1_BUSY_bit                                     = 1 << 27,
+       CMC0_BUSY_bit                                     = 1 << 28,
+       BIF_BUSY_bit                                      = 1 << 29,
+       IDCT_BUSY_bit                                     = 1 << 30,
+
+    SRBM_READ_ERROR                                       = 0x0e98,
+       READ_ADDRESS_mask                                 = 0xffff << 2,
+       READ_ADDRESS_shift                                = 2,
+       READ_REQUESTER_HI_bit                             = 1 << 24,
+       READ_REQUESTER_GRBM_bit                           = 1 << 25,
+       READ_REQUESTER_RCU_bit                            = 1 << 26,
+       READ_REQUESTER_RLC_bit                            = 1 << 27,
+       READ_ERROR_bit                                    = 1 << 31,
+
+    SRBM_INT_STATUS                                       = 0x0ea4,
+       RDERR_INT_STAT_bit                                = 1 << 0,
+       GFX_CNTX_SWITCH_INT_STAT_bit                      = 1 << 1,
+    SRBM_INT_ACK                                          = 0x0ea8,
+       RDERR_INT_ACK_bit                                 = 1 << 0,
+       GFX_CNTX_SWITCH_INT_ACK_bit                       = 1 << 1,
+
+    R6XX_MC_VM_FB_LOCATION                                = 0x2180,
+
+    VENDOR_DEVICE_ID                                      = 0x4000,
+
+    D1GRPH_PRIMARY_SURFACE_ADDRESS                        = 0x6110,
+    D1GRPH_PITCH                                          = 0x6120,
+    D1GRPH_Y_END                                          = 0x6138,
+
+    GRBM_STATUS                                           = 0x8010,
+       CMDFIFO_AVAIL_mask                                = 0x1f << 0,
+       CMDFIFO_AVAIL_shift                               = 0,
+       SRBM_RQ_PENDING_bit                               = 1 << 5,
+       CP_RQ_PENDING_bit                                 = 1 << 6,
+       CF_RQ_PENDING_bit                                 = 1 << 7,
+       PF_RQ_PENDING_bit                                 = 1 << 8,
+       GRBM_EE_BUSY_bit                                  = 1 << 10,
+       GRBM_STATUS__VC_BUSY_bit                          = 1 << 11,
+       DB03_CLEAN_bit                                    = 1 << 12,
+       CB03_CLEAN_bit                                    = 1 << 13,
+       VGT_BUSY_NO_DMA_bit                               = 1 << 16,
+       GRBM_STATUS__VGT_BUSY_bit                         = 1 << 17,
+       TA03_BUSY_bit                                     = 1 << 18,
+       GRBM_STATUS__TC_BUSY_bit                          = 1 << 19,
+       SX_BUSY_bit                                       = 1 << 20,
+       SH_BUSY_bit                                       = 1 << 21,
+       SPI03_BUSY_bit                                    = 1 << 22,
+       SMX_BUSY_bit                                      = 1 << 23,
+       SC_BUSY_bit                                       = 1 << 24,
+       PA_BUSY_bit                                       = 1 << 25,
+       DB03_BUSY_bit                                     = 1 << 26,
+       CR_BUSY_bit                                       = 1 << 27,
+       CP_COHERENCY_BUSY_bit                             = 1 << 28,
+       GRBM_STATUS__CP_BUSY_bit                          = 1 << 29,
+       CB03_BUSY_bit                                     = 1 << 30,
+       GUI_ACTIVE_bit                                    = 1 << 31,
+    GRBM_STATUS2                                          = 0x8014,
+       CR_CLEAN_bit                                      = 1 << 0,
+       SMX_CLEAN_bit                                     = 1 << 1,
+       SPI0_BUSY_bit                                     = 1 << 8,
+       SPI1_BUSY_bit                                     = 1 << 9,
+       SPI2_BUSY_bit                                     = 1 << 10,
+       SPI3_BUSY_bit                                     = 1 << 11,
+       TA0_BUSY_bit                                      = 1 << 12,
+       TA1_BUSY_bit                                      = 1 << 13,
+       TA2_BUSY_bit                                      = 1 << 14,
+       TA3_BUSY_bit                                      = 1 << 15,
+       DB0_BUSY_bit                                      = 1 << 16,
+       DB1_BUSY_bit                                      = 1 << 17,
+       DB2_BUSY_bit                                      = 1 << 18,
+       DB3_BUSY_bit                                      = 1 << 19,
+       CB0_BUSY_bit                                      = 1 << 20,
+       CB1_BUSY_bit                                      = 1 << 21,
+       CB2_BUSY_bit                                      = 1 << 22,
+       CB3_BUSY_bit                                      = 1 << 23,
+    GRBM_SOFT_RESET                                       = 0x8020,
+       SOFT_RESET_CP_bit                                 = 1 << 0,
+       SOFT_RESET_CB_bit                                 = 1 << 1,
+       SOFT_RESET_CR_bit                                 = 1 << 2,
+       SOFT_RESET_DB_bit                                 = 1 << 3,
+       SOFT_RESET_PA_bit                                 = 1 << 5,
+       SOFT_RESET_SC_bit                                 = 1 << 6,
+       SOFT_RESET_SMX_bit                                = 1 << 7,
+       SOFT_RESET_SPI_bit                                = 1 << 8,
+       SOFT_RESET_SH_bit                                 = 1 << 9,
+       SOFT_RESET_SX_bit                                 = 1 << 10,
+       SOFT_RESET_TC_bit                                 = 1 << 11,
+       SOFT_RESET_TA_bit                                 = 1 << 12,
+       SOFT_RESET_VC_bit                                 = 1 << 13,
+       SOFT_RESET_VGT_bit                                = 1 << 14,
+       SOFT_RESET_GRBM_GCA_bit                           = 1 << 15,
+
+    WAIT_UNTIL                                            = 0x8040,
+       WAIT_CP_DMA_IDLE_bit                              = 1 << 8,
+       WAIT_CMDFIFO_bit                                  = 1 << 10,
+       WAIT_2D_IDLE_bit                                  = 1 << 14,
+       WAIT_3D_IDLE_bit                                  = 1 << 15,
+       WAIT_2D_IDLECLEAN_bit                             = 1 << 16,
+       WAIT_3D_IDLECLEAN_bit                             = 1 << 17,
+       WAIT_EXTERN_SIG_bit                               = 1 << 19,
+       CMDFIFO_ENTRIES_mask                              = 0x1f << 20,
+       CMDFIFO_ENTRIES_shift                             = 20,
+
+    GRBM_READ_ERROR                                       = 0x8058,
+/*     READ_ADDRESS_mask                                 = 0xffff << 2, */
+/*     READ_ADDRESS_shift                                = 2, */
+       READ_REQUESTER_SRBM_bit                           = 1 << 28,
+       READ_REQUESTER_CP_bit                             = 1 << 29,
+       READ_REQUESTER_WU_POLL_bit                        = 1 << 30,
+/*     READ_ERROR_bit                                    = 1 << 31, */
+
+    SCRATCH_REG0                                         = 0x8500,
+    SCRATCH_REG1                                         = 0x8504,
+    SCRATCH_REG2                                         = 0x8508,
+    SCRATCH_REG3                                         = 0x850c,
+    SCRATCH_REG4                                         = 0x8510,
+    SCRATCH_REG5                                         = 0x8514,
+    SCRATCH_REG6                                         = 0x8518,
+    SCRATCH_REG7                                         = 0x851c,
+    SCRATCH_UMSK                                         = 0x8540,
+    SCRATCH_ADDR                                         = 0x8544,
+
+    CP_COHER_CNTL                                         = 0x85f0,
+       DEST_BASE_0_ENA_bit                               = 1 << 0,
+       DEST_BASE_1_ENA_bit                               = 1 << 1,
+       SO0_DEST_BASE_ENA_bit                             = 1 << 2,
+       SO1_DEST_BASE_ENA_bit                             = 1 << 3,
+       SO2_DEST_BASE_ENA_bit                             = 1 << 4,
+       SO3_DEST_BASE_ENA_bit                             = 1 << 5,
+       CB0_DEST_BASE_ENA_bit                             = 1 << 6,
+       CB1_DEST_BASE_ENA_bit                             = 1 << 7,
+       CB2_DEST_BASE_ENA_bit                             = 1 << 8,
+       CB3_DEST_BASE_ENA_bit                             = 1 << 9,
+       CB4_DEST_BASE_ENA_bit                             = 1 << 10,
+       CB5_DEST_BASE_ENA_bit                             = 1 << 11,
+       CB6_DEST_BASE_ENA_bit                             = 1 << 12,
+       CB7_DEST_BASE_ENA_bit                             = 1 << 13,
+       DB_DEST_BASE_ENA_bit                              = 1 << 14,
+       CR_DEST_BASE_ENA_bit                              = 1 << 15,
+       TC_ACTION_ENA_bit                                 = 1 << 23,
+       VC_ACTION_ENA_bit                                 = 1 << 24,
+       CB_ACTION_ENA_bit                                 = 1 << 25,
+       DB_ACTION_ENA_bit                                 = 1 << 26,
+       SH_ACTION_ENA_bit                                 = 1 << 27,
+       SMX_ACTION_ENA_bit                                = 1 << 28,
+       CR0_ACTION_ENA_bit                                = 1 << 29,
+       CR1_ACTION_ENA_bit                                = 1 << 30,
+       CR2_ACTION_ENA_bit                                = 1 << 31,
+    CP_COHER_SIZE                                         = 0x85f4,
+    CP_COHER_BASE                                         = 0x85f8,
+    CP_COHER_STATUS                                       = 0x85fc,
+       MATCHING_GFX_CNTX_mask                            = 0xff << 0,
+       MATCHING_GFX_CNTX_shift                           = 0,
+       MATCHING_CR_CNTX_mask                             = 0xffff << 8,
+       MATCHING_CR_CNTX_shift                            = 8,
+       STATUS_bit                                        = 1 << 31,
+
+    CP_STALLED_STAT1                                      = 0x8674,
+       RBIU_TO_DMA_NOT_RDY_TO_RCV_bit                    = 1 << 0,
+       RBIU_TO_IBS_NOT_RDY_TO_RCV_bit                    = 1 << 1,
+       RBIU_TO_SEM_NOT_RDY_TO_RCV_bit                    = 1 << 2,
+       RBIU_TO_2DREGS_NOT_RDY_TO_RCV_bit                 = 1 << 3,
+       RBIU_TO_MEMWR_NOT_RDY_TO_RCV_bit                  = 1 << 4,
+       RBIU_TO_MEMRD_NOT_RDY_TO_RCV_bit                  = 1 << 5,
+       RBIU_TO_EOPD_NOT_RDY_TO_RCV_bit                   = 1 << 6,
+       RBIU_TO_RECT_NOT_RDY_TO_RCV_bit                   = 1 << 7,
+       RBIU_TO_STRMO_NOT_RDY_TO_RCV_bit                  = 1 << 8,
+       RBIU_TO_PSTAT_NOT_RDY_TO_RCV_bit                  = 1 << 9,
+       MIU_WAITING_ON_RDREQ_FREE_bit                     = 1 << 16,
+       MIU_WAITING_ON_WRREQ_FREE_bit                     = 1 << 17,
+       MIU_NEEDS_AVAIL_WRREQ_PHASE_bit                   = 1 << 18,
+       RCIU_WAITING_ON_GRBM_FREE_bit                     = 1 << 24,
+       RCIU_WAITING_ON_VGT_FREE_bit                      = 1 << 25,
+       RCIU_STALLED_ON_ME_READ_bit                       = 1 << 26,
+       RCIU_STALLED_ON_DMA_READ_bit                      = 1 << 27,
+       RCIU_HALTED_BY_REG_VIOLATION_bit                  = 1 << 28,
+    CP_STALLED_STAT2                                      = 0x8678,
+       PFP_TO_CSF_NOT_RDY_TO_RCV_bit                     = 1 << 0,
+       PFP_TO_MEQ_NOT_RDY_TO_RCV_bit                     = 1 << 1,
+       PFP_TO_VGT_NOT_RDY_TO_RCV_bit                     = 1 << 2,
+       PFP_HALTED_BY_INSTR_VIOLATION_bit                 = 1 << 3,
+       MULTIPASS_IB_PENDING_IN_PFP_bit                   = 1 << 4,
+       ME_BRUSH_WC_NOT_RDY_TO_RCV_bit                    = 1 << 8,
+       ME_STALLED_ON_BRUSH_LOGIC_bit                     = 1 << 9,
+       CR_CNTX_NOT_AVAIL_TO_ME_bit                       = 1 << 10,
+       GFX_CNTX_NOT_AVAIL_TO_ME_bit                      = 1 << 11,
+       ME_RCIU_NOT_RDY_TO_RCV_bit                        = 1 << 12,
+       ME_TO_CONST_NOT_RDY_TO_RCV_bit                    = 1 << 13,
+       ME_WAITING_DATA_FROM_PFP_bit                      = 1 << 14,
+       ME_WAITING_ON_PARTIAL_FLUSH_bit                   = 1 << 15,
+       RECT_FIFO_NEEDS_CR_RECT_DONE_bit                  = 1 << 16,
+       RECT_FIFO_NEEDS_WR_CONFIRM_bit                    = 1 << 17,
+       EOPD_FIFO_NEEDS_SC_EOP_DONE_bit                   = 1 << 18,
+       EOPD_FIFO_NEEDS_SMX_EOP_DONE_bit                  = 1 << 19,
+       EOPD_FIFO_NEEDS_WR_CONFIRM_bit                    = 1 << 20,
+       EOPD_FIFO_NEEDS_SIGNAL_SEM_bit                    = 1 << 21,
+       SO_NUMPRIM_FIFO_NEEDS_SOADDR_bit                  = 1 << 22,
+       SO_NUMPRIM_FIFO_NEEDS_NUMPRIM_bit                 = 1 << 23,
+       PIPE_STATS_FIFO_NEEDS_SAMPLE_bit                  = 1 << 24,
+       SURF_SYNC_NEEDS_IDLE_CNTXS_bit                    = 1 << 30,
+       SURF_SYNC_NEEDS_ALL_CLEAN_bit                     = 1 << 31,
+    CP_BUSY_STAT                                          = 0x867c,
+       REG_BUS_FIFO_BUSY_bit                             = 1 << 0,
+       RING_FETCHING_DATA_bit                            = 1 << 1,
+       INDR1_FETCHING_DATA_bit                           = 1 << 2,
+       INDR2_FETCHING_DATA_bit                           = 1 << 3,
+       STATE_FETCHING_DATA_bit                           = 1 << 4,
+       PRED_FETCHING_DATA_bit                            = 1 << 5,
+       COHER_CNTR_NEQ_ZERO_bit                           = 1 << 6,
+       PFP_PARSING_PACKETS_bit                           = 1 << 7,
+       ME_PARSING_PACKETS_bit                            = 1 << 8,
+       RCIU_PFP_BUSY_bit                                 = 1 << 9,
+       RCIU_ME_BUSY_bit                                  = 1 << 10,
+       OUTSTANDING_READ_TAGS_bit                         = 1 << 11,
+       SEM_CMDFIFO_NOT_EMPTY_bit                         = 1 << 12,
+       SEM_FAILED_AND_HOLDING_bit                        = 1 << 13,
+       SEM_POLLING_FOR_PASS_bit                          = 1 << 14,
+       _3D_BUSY_bit                                      = 1 << 15,
+       _2D_BUSY_bit                                      = 1 << 16,
+    CP_STAT                                               = 0x8680,
+       CSF_RING_BUSY_bit                                 = 1 << 0,
+       CSF_WPTR_POLL_BUSY_bit                            = 1 << 1,
+       CSF_INDIRECT1_BUSY_bit                            = 1 << 2,
+       CSF_INDIRECT2_BUSY_bit                            = 1 << 3,
+       CSF_STATE_BUSY_bit                                = 1 << 4,
+       CSF_PREDICATE_BUSY_bit                            = 1 << 5,
+       CSF_BUSY_bit                                      = 1 << 6,
+       MIU_RDREQ_BUSY_bit                                = 1 << 7,
+       MIU_WRREQ_BUSY_bit                                = 1 << 8,
+       ROQ_RING_BUSY_bit                                 = 1 << 9,
+       ROQ_INDIRECT1_BUSY_bit                            = 1 << 10,
+       ROQ_INDIRECT2_BUSY_bit                            = 1 << 11,
+       ROQ_STATE_BUSY_bit                                = 1 << 12,
+       ROQ_PREDICATE_BUSY_bit                            = 1 << 13,
+       ROQ_ALIGN_BUSY_bit                                = 1 << 14,
+       PFP_BUSY_bit                                      = 1 << 15,
+       MEQ_BUSY_bit                                      = 1 << 16,
+       ME_BUSY_bit                                       = 1 << 17,
+       QUERY_BUSY_bit                                    = 1 << 18,
+       SEMAPHORE_BUSY_bit                                = 1 << 19,
+       INTERRUPT_BUSY_bit                                = 1 << 20,
+       SURFACE_SYNC_BUSY_bit                             = 1 << 21,
+       DMA_BUSY_bit                                      = 1 << 22,
+       RCIU_BUSY_bit                                     = 1 << 23,
+       CP_STAT__CP_BUSY_bit                              = 1 << 31,
+
+    CP_ME_CNTL                                            = 0x86d8,
+       ME_STATMUX_mask                                   = 0xff << 0,
+       ME_STATMUX_shift                                  = 0,
+       ME_HALT_bit                                       = 1 << 28,
+    CP_ME_STATUS                                          = 0x86dc,
+
+    CP_RB_RPTR                                            = 0x8700,
+       RB_RPTR_mask                                      = 0xfffff << 0,
+       RB_RPTR_shift                                     = 0,
+    CP_RB_WPTR_DELAY                                      = 0x8704,
+       PRE_WRITE_TIMER_mask                              = 0xfffffff << 0,
+       PRE_WRITE_TIMER_shift                             = 0,
+       PRE_WRITE_LIMIT_mask                              = 0x0f << 28,
+       PRE_WRITE_LIMIT_shift                             = 28,
+
+    CP_ROQ_RB_STAT                                        = 0x8780,
+       ROQ_RPTR_PRIMARY_mask                             = 0x3ff << 0,
+       ROQ_RPTR_PRIMARY_shift                            = 0,
+       ROQ_WPTR_PRIMARY_mask                             = 0x3ff << 16,
+       ROQ_WPTR_PRIMARY_shift                            = 16,
+    CP_ROQ_IB1_STAT                                       = 0x8784,
+       ROQ_RPTR_INDIRECT1_mask                           = 0x3ff << 0,
+       ROQ_RPTR_INDIRECT1_shift                          = 0,
+       ROQ_WPTR_INDIRECT1_mask                           = 0x3ff << 16,
+       ROQ_WPTR_INDIRECT1_shift                          = 16,
+    CP_ROQ_IB2_STAT                                       = 0x8788,
+       ROQ_RPTR_INDIRECT2_mask                           = 0x3ff << 0,
+       ROQ_RPTR_INDIRECT2_shift                          = 0,
+       ROQ_WPTR_INDIRECT2_mask                           = 0x3ff << 16,
+       ROQ_WPTR_INDIRECT2_shift                          = 16,
+
+    CP_MEQ_STAT                                           = 0x8794,
+       MEQ_RPTR_mask                                     = 0x3ff << 0,
+       MEQ_RPTR_shift                                    = 0,
+       MEQ_WPTR_mask                                     = 0x3ff << 16,
+       MEQ_WPTR_shift                                    = 16,
+
+    CC_GC_SHADER_PIPE_CONFIG                              = 0x8950,
+       INACTIVE_QD_PIPES_mask                            = 0xff << 8,
+       INACTIVE_QD_PIPES_shift                           = 8,
+           R6XX_MAX_QD_PIPES                             = 8,
+       INACTIVE_SIMDS_mask                               = 0xff << 16,
+       INACTIVE_SIMDS_shift                              = 16,
+           R6XX_MAX_SIMDS                                = 8,
+    GC_USER_SHADER_PIPE_CONFIG                            = 0x8954,
+
+    VC_ENHANCE                                            = 0x9714,
+    DB_DEBUG                                              = 0x9830,
+        PREZ_MUST_WAIT_FOR_POSTZ_DONE                     = 1 << 31,
+
+    DB_WATERMARKS                                         = 0x00009838,
+       DEPTH_FREE_mask                                   = 0x1f << 0,
+       DEPTH_FREE_shift                                  = 0,
+       DEPTH_FLUSH_mask                                  = 0x3f << 5,
+       DEPTH_FLUSH_shift                                 = 5,
+       FORCE_SUMMARIZE_mask                              = 0x0f << 11,
+       FORCE_SUMMARIZE_shift                             = 11,
+       DEPTH_PENDING_FREE_mask                           = 0x1f << 15,
+       DEPTH_PENDING_FREE_shift                          = 15,
+       DEPTH_CACHELINE_FREE_mask                         = 0x1f << 20,
+       DEPTH_CACHELINE_FREE_shift                        = 20,
+       EARLY_Z_PANIC_DISABLE_bit                         = 1 << 25,
+       LATE_Z_PANIC_DISABLE_bit                          = 1 << 26,
+       RE_Z_PANIC_DISABLE_bit                            = 1 << 27,
+       DB_EXTRA_DEBUG_mask                               = 0x0f << 28,
+       DB_EXTRA_DEBUG_shift                              = 28,
+
+    CP_RB_BASE                                            = 0xc100,
+    CP_RB_CNTL                                            = 0xc104,
+        RB_BUFSZ_mask                                     = 0x3f << 0,
+    CP_RB_WPTR                                            = 0xc114,
+       RB_WPTR_mask                                      = 0xfffff << 0,
+       RB_WPTR_shift                                     = 0,
+    CP_RB_RPTR_WR                                         = 0xc108,
+       RB_RPTR_WR_mask                                   = 0xfffff << 0,
+       RB_RPTR_WR_shift                                  = 0,
+
+    CP_INT_STATUS                                         = 0xc128,
+       DISABLE_CNTX_SWITCH_INT_STAT_bit                  = 1 << 0,
+       ENABLE_CNTX_SWITCH_INT_STAT_bit                   = 1 << 1,
+       SEM_SIGNAL_INT_STAT_bit                           = 1 << 18,
+       CNTX_BUSY_INT_STAT_bit                            = 1 << 19,
+       CNTX_EMPTY_INT_STAT_bit                           = 1 << 20,
+       WAITMEM_SEM_INT_STAT_bit                          = 1 << 21,
+       PRIV_INSTR_INT_STAT_bit                           = 1 << 22,
+       PRIV_REG_INT_STAT_bit                             = 1 << 23,
+       OPCODE_ERROR_INT_STAT_bit                         = 1 << 24,
+       SCRATCH_INT_STAT_bit                              = 1 << 25,
+       TIME_STAMP_INT_STAT_bit                           = 1 << 26,
+       RESERVED_BIT_ERROR_INT_STAT_bit                   = 1 << 27,
+       DMA_INT_STAT_bit                                  = 1 << 28,
+       IB2_INT_STAT_bit                                  = 1 << 29,
+       IB1_INT_STAT_bit                                  = 1 << 30,
+       RB_INT_STAT_bit                                   = 1 << 31,
+
+//  SX_ALPHA_TEST_CONTROL                                 = 0x00028410,
+       ALPHA_FUNC__REF_NEVER                             = 0,
+       ALPHA_FUNC__REF_ALWAYS                            = 7,
+//  DB_SHADER_CONTROL                                     = 0x0002880c,
+       Z_ORDER__EARLY_Z_THEN_LATE_Z                      = 2,
+//  PA_SU_SC_MODE_CNTL                                    = 0x00028814,
+//     POLY_MODE_mask                                    = 0x03 << 3,
+       POLY_MODE__TRIANGLES = 0, POLY_MODE__DUAL_MODE,
+//     POLYMODE_FRONT_PTYPE_mask                         = 0x07 << 5,
+       POLYMODE_PTYPE__POINTS = 0, POLYMODE_PTYPE__LINES, POLYMODE_PTYPE__TRIANGLES,
+    PA_SC_AA_SAMPLE_LOCS_8S_WD1_M                         = 0x00028c20,
+    DB_SRESULTS_COMPARE_STATE0                            = 0x00028d28,        /* See autoregs: DB_SRESULTS_COMPARE_STATE1 */
+//  DB_SRESULTS_COMPARE_STATE1                            = 0x00028d2c,
+    DB_ALPHA_TO_MASK                                      = 0x00028d44,
+       ALPHA_TO_MASK_ENABLE                              = 1 << 0,
+       ALPHA_TO_MASK_OFFSET0_mask                        = 0x03 << 8,
+       ALPHA_TO_MASK_OFFSET0_shift                       = 8,
+       ALPHA_TO_MASK_OFFSET1_mask                        = 0x03 << 8,
+       ALPHA_TO_MASK_OFFSET1_shift                       = 10,
+       ALPHA_TO_MASK_OFFSET2_mask                        = 0x03 << 8,
+       ALPHA_TO_MASK_OFFSET2_shift                       = 12,
+       ALPHA_TO_MASK_OFFSET3_mask                        = 0x03 << 8,
+       ALPHA_TO_MASK_OFFSET3_shift                       = 14,
+
+//  SQ_VTX_CONSTANT_WORD2_0                               = 0x00038008,
+//     SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask         = 0x3f << 20,
+       FMT_INVALID=0,      FMT_8,          FMT_4_4,            FMT_3_3_2,
+                           FMT_16=5,       FMT_16_FLOAT,       FMT_8_8,
+       FMT_5_6_5,          FMT_6_5_5,      FMT_1_5_5_5,        FMT_4_4_4_4,
+       FMT_5_5_5_1,        FMT_32,         FMT_32_FLOAT,       FMT_16_16,
+       FMT_16_16_FLOAT=16, FMT_8_24,       FMT_8_24_FLOAT,     FMT_24_8,
+       FMT_24_8_FLOAT,     FMT_10_11_11,   FMT_10_11_11_FLOAT, FMT_11_11_10,
+       FMT_11_11_10_FLOAT, FMT_2_10_10_10, FMT_8_8_8_8,        FMT_10_10_10_2,
+       FMT_X24_8_32_FLOAT, FMT_32_32,      FMT_32_32_FLOAT,    FMT_16_16_16_16,
+       FMT_16_16_16_16_FLOAT=32,           FMT_32_32_32_32=34, FMT_32_32_32_32_FLOAT,
+                           FMT_1 = 37,                         FMT_GB_GR=39,
+       FMT_BG_RG,          FMT_32_AS_8,    FMT_32_AS_8_8,      FMT_5_9_9_9_SHAREDEXP,
+       FMT_8_8_8,          FMT_16_16_16,   FMT_16_16_16_FLOAT, FMT_32_32_32,
+       FMT_32_32_32_FLOAT=48,
+
+//  High level register file lengths
+    SQ_ALU_CONSTANT                                       = SQ_ALU_CONSTANT0_0,        /* 256 PS, 256 VS */
+    SQ_ALU_CONSTANT_ps_num                                = 256,
+    SQ_ALU_CONSTANT_vs_num                                = 256,
+    SQ_ALU_CONSTANT_all_num                               = 512,
+    SQ_ALU_CONSTANT_offset                                = 16,
+    SQ_ALU_CONSTANT_ps                                    = 0,
+    SQ_ALU_CONSTANT_vs                                    = SQ_ALU_CONSTANT_ps + SQ_ALU_CONSTANT_ps_num,
+    SQ_TEX_RESOURCE                                       = SQ_TEX_RESOURCE_WORD0_0,   /* 160 PS, 160 VS, 16 FS, 160 GS */
+    SQ_TEX_RESOURCE_ps_num                                = 160,
+    SQ_TEX_RESOURCE_vs_num                                = 160,
+    SQ_TEX_RESOURCE_fs_num                                = 16,
+    SQ_TEX_RESOURCE_gs_num                                = 160,
+    SQ_TEX_RESOURCE_all_num                               = 496,
+    SQ_TEX_RESOURCE_offset                                = 28,
+    SQ_TEX_RESOURCE_ps                                    = 0,
+    SQ_TEX_RESOURCE_vs                                    = SQ_TEX_RESOURCE_ps + SQ_TEX_RESOURCE_ps_num,
+    SQ_TEX_RESOURCE_fs                                    = SQ_TEX_RESOURCE_vs + SQ_TEX_RESOURCE_vs_num,
+    SQ_TEX_RESOURCE_gs                                    = SQ_TEX_RESOURCE_fs + SQ_TEX_RESOURCE_fs_num,
+    SQ_VTX_RESOURCE                                       = SQ_VTX_CONSTANT_WORD0_0,   /* 160 PS, 160 VS, 16 FS, 160 GS */
+    SQ_VTX_RESOURCE_ps_num                                = 160,
+    SQ_VTX_RESOURCE_vs_num                                = 160,
+    SQ_VTX_RESOURCE_fs_num                                = 16,
+    SQ_VTX_RESOURCE_gs_num                                = 160,
+    SQ_VTX_RESOURCE_all_num                               = 496,
+    SQ_VTX_RESOURCE_offset                                = 28,
+    SQ_VTX_RESOURCE_ps                                    = 0,
+    SQ_VTX_RESOURCE_vs                                    = SQ_VTX_RESOURCE_ps + SQ_VTX_RESOURCE_ps_num,
+    SQ_VTX_RESOURCE_fs                                    = SQ_VTX_RESOURCE_vs + SQ_VTX_RESOURCE_vs_num,
+    SQ_VTX_RESOURCE_gs                                    = SQ_VTX_RESOURCE_fs + SQ_VTX_RESOURCE_fs_num,
+    SQ_TEX_SAMPLER_WORD                                   = SQ_TEX_SAMPLER_WORD0_0,    /* 18 per PS, VS, GS */
+    SQ_TEX_SAMPLER_WORD_ps_num                            = 18,
+    SQ_TEX_SAMPLER_WORD_vs_num                            = 18,
+    SQ_TEX_SAMPLER_WORD_gs_num                            = 18,
+    SQ_TEX_SAMPLER_WORD_all_num                           = 54,
+    SQ_TEX_SAMPLER_WORD_offset                            = 12,
+    SQ_TEX_SAMPLER_WORD_ps                                = 0,
+    SQ_TEX_SAMPLER_WORD_vs                                = SQ_TEX_SAMPLER_WORD_ps + SQ_TEX_SAMPLER_WORD_ps_num,
+    SQ_TEX_SAMPLER_WORD_gs                                = SQ_TEX_SAMPLER_WORD_vs + SQ_TEX_SAMPLER_WORD_vs_num,
+    SQ_LOOP_CONST                                         = SQ_LOOP_CONST_0,           /* 32 per PS, VS, GS */
+    SQ_LOOP_CONST_ps_num                                  = 32,
+    SQ_LOOP_CONST_vs_num                                  = 32,
+    SQ_LOOP_CONST_gs_num                                  = 32,
+    SQ_LOOP_CONST_all_num                                 = 96,
+    SQ_LOOP_CONST_offset                                  = 4,
+    SQ_LOOP_CONST_ps                                      = 0,
+    SQ_LOOP_CONST_vs                                      = SQ_LOOP_CONST_ps + SQ_LOOP_CONST_ps_num,
+    SQ_LOOP_CONST_gs                                      = SQ_LOOP_CONST_vs + SQ_LOOP_CONST_vs_num,
+} ;
+
+
+#endif
diff --git a/src/mesa/drivers/dri/r600/r600_reg_r7xx.h b/src/mesa/drivers/dri/r600/r600_reg_r7xx.h
new file mode 100644 (file)
index 0000000..e5c01c8
--- /dev/null
@@ -0,0 +1,149 @@
+/*
+ * RadeonHD R6xx, R7xx Register documentation
+ *
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ * Copyright (C) 2008-2009  Matthias Hopf
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _R600_REG_R7xx_H_
+#define _R600_REG_R7xx_H_
+
+/*
+ * Register update for R7xx chips
+ */
+
+enum {
+
+    R7XX_MC_VM_FB_LOCATION                                = 0x00002024,
+
+//  GRBM_STATUS                                           = 0x00008010,
+       R7XX_TA_BUSY_bit                                  = 1 << 14,
+
+    R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ                     = 0x00008d8c,
+       RING0_OFFSET_mask                                 = 0xff << 0,
+       RING0_OFFSET_shift                                = 0,
+       ISOLATE_ES_ENABLE_bit                             = 1 << 12,
+       ISOLATE_GS_ENABLE_bit                             = 1 << 13,
+       VS_PC_LIMIT_ENABLE_bit                            = 1 << 14,
+
+//  SQ_ALU_WORD0                                          = 0x00008dfc,
+//     SRC0_SEL_mask                                     = 0x1ff << 0,
+//     SRC1_SEL_mask                                     = 0x1ff << 13,
+           R7xx_SQ_ALU_SRC_1_DBL_L                       = 0xf4,
+           R7xx_SQ_ALU_SRC_1_DBL_M                       = 0xf5,
+           R7xx_SQ_ALU_SRC_0_5_DBL_L                     = 0xf6,
+           R7xx_SQ_ALU_SRC_0_5_DBL_M                     = 0xf7,
+//     INDEX_MODE_mask                                   = 0x07 << 26,
+           R7xx_SQ_INDEX_GLOBAL                          = 0x05,
+           R7xx_SQ_INDEX_GLOBAL_AR_X                     = 0x06,
+    R6xx_SQ_ALU_WORD1_OP2                                 = 0x00008dfc,
+    R7xx_SQ_ALU_WORD1_OP2_V2                              = 0x00008dfc,
+       R6xx_FOG_MERGE_bit                                = 1 << 5,
+       R6xx_OMOD_mask                                    = 0x03 << 6,
+       R7xx_OMOD_mask                                    = 0x03 << 5,
+       R6xx_OMOD_shift                                   = 6,
+       R7xx_OMOD_shift                                   = 5,
+       R6xx_SQ_ALU_WORD1_OP2__ALU_INST_mask              = 0x3ff << 8,
+       R7xx_SQ_ALU_WORD1_OP2_V2__ALU_INST_mask           = 0x7ff << 7,
+       R6xx_SQ_ALU_WORD1_OP2__ALU_INST_shift             = 8,
+       R7xx_SQ_ALU_WORD1_OP2_V2__ALU_INST_shift          = 7,
+           R7xx_SQ_OP2_INST_FREXP_64                     = 0x07,
+           R7xx_SQ_OP2_INST_ADD_64                       = 0x17,
+           R7xx_SQ_OP2_INST_MUL_64                       = 0x1b,
+           R7xx_SQ_OP2_INST_FLT64_TO_FLT32               = 0x1c,
+           R7xx_SQ_OP2_INST_FLT32_TO_FLT64               = 0x1d,
+           R7xx_SQ_OP2_INST_LDEXP_64                     = 0x7a,
+           R7xx_SQ_OP2_INST_FRACT_64                     = 0x7b,
+           R7xx_SQ_OP2_INST_PRED_SETGT_64                = 0x7c,
+           R7xx_SQ_OP2_INST_PRED_SETE_64                 = 0x7d,
+           R7xx_SQ_OP2_INST_PRED_SETGE_64                = 0x7e,
+//  SQ_ALU_WORD1_OP3                                      = 0x00008dfc,
+//     SRC2_SEL_mask                                     = 0x1ff << 0,
+//         R7xx_SQ_ALU_SRC_1_DBL_L                       = 0xf4,
+//         R7xx_SQ_ALU_SRC_1_DBL_M                       = 0xf5,
+//         R7xx_SQ_ALU_SRC_0_5_DBL_L                     = 0xf6,
+//         R7xx_SQ_ALU_SRC_0_5_DBL_M                     = 0xf7,
+//     SQ_ALU_WORD1_OP3__ALU_INST_mask                   = 0x1f << 13,
+           R7xx_SQ_OP3_INST_MULADD_64                    = 0x08,
+           R7xx_SQ_OP3_INST_MULADD_64_M2                 = 0x09,
+           R7xx_SQ_OP3_INST_MULADD_64_M4                 = 0x0a,
+           R7xx_SQ_OP3_INST_MULADD_64_D2                 = 0x0b,
+//  SQ_CF_ALU_WORD1                                       = 0x00008dfc,
+       R6xx_USES_WATERFALL_bit                           = 1 << 25,
+       R7xx_SQ_CF_ALU_WORD1__ALT_CONST_bit               = 1 << 25,
+//  SQ_CF_ALLOC_EXPORT_WORD0                              = 0x00008dfc,
+//     ARRAY_BASE_mask                                   = 0x1fff << 0,
+//     TYPE_mask                                         = 0x03 << 13,
+//         SQ_EXPORT_PARAM                               = 0x02,
+//         X_UNUSED_FOR_SX_EXPORTS                       = 0x03,
+//     ELEM_SIZE_mask                                    = 0x03 << 30,
+//  SQ_CF_ALLOC_EXPORT_WORD1                              = 0x00008dfc,
+//     SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_mask            = 0x7f << 23,
+           R7xx_SQ_CF_INST_MEM_EXPORT                    = 0x3a,
+//  SQ_CF_WORD1                                           = 0x00008dfc,
+//     SQ_CF_WORD1__COUNT_mask                           = 0x07 << 10,
+       R7xx_COUNT_3_bit                                  = 1 << 19,
+//     SQ_CF_WORD1__CF_INST_mask                         = 0x7f << 23,
+           R7xx_SQ_CF_INST_END_PROGRAM                   = 0x19,
+           R7xx_SQ_CF_INST_WAIT_ACK                      = 0x1a,
+           R7xx_SQ_CF_INST_TEX_ACK                       = 0x1b,
+           R7xx_SQ_CF_INST_VTX_ACK                       = 0x1c,
+           R7xx_SQ_CF_INST_VTX_TC_ACK                    = 0x1d,
+//  SQ_VTX_WORD0                                          = 0x00008dfc,
+//     VTX_INST_mask                                     = 0x1f << 0,
+           R7xx_SQ_VTX_INST_MEM                          = 0x02,
+//  SQ_VTX_WORD2                                          = 0x00008dfc,
+       R7xx_SQ_VTX_WORD2__ALT_CONST_bit                  = 1 << 20,
+
+//  SQ_TEX_WORD0                                          = 0x00008dfc,
+//     TEX_INST_mask                                     = 0x1f << 0,
+           R7xx_X_MEMORY_READ                            = 0x02,
+           R7xx_SQ_TEX_INST_KEEP_GRADIENTS               = 0x0a,
+           R7xx_X_FETCH4_LOAD4_INSTRUCTION_FOR_DX10_1    = 0x0f,
+       R7xx_SQ_TEX_WORD0__ALT_CONST_bit                  = 1 << 24,
+
+    R7xx_PA_SC_EDGERULE                                   = 0x00028230,
+    R7xx_SPI_THREAD_GROUPING                              = 0x000286c8,
+       PS_GROUPING_mask                                  = 0x1f << 0,
+       PS_GROUPING_shift                                 = 0,
+       VS_GROUPING_mask                                  = 0x1f << 8,
+       VS_GROUPING_shift                                 = 8,
+       GS_GROUPING_mask                                  = 0x1f << 16,
+       GS_GROUPING_shift                                 = 16,
+       ES_GROUPING_mask                                  = 0x1f << 24,
+       ES_GROUPING_shift                                 = 24,
+    R7xx_CB_SHADER_CONTROL                                = 0x000287a0,
+       RT0_ENABLE_bit                                    = 1 << 0,
+       RT1_ENABLE_bit                                    = 1 << 1,
+       RT2_ENABLE_bit                                    = 1 << 2,
+       RT3_ENABLE_bit                                    = 1 << 3,
+       RT4_ENABLE_bit                                    = 1 << 4,
+       RT5_ENABLE_bit                                    = 1 << 5,
+       RT6_ENABLE_bit                                    = 1 << 6,
+       RT7_ENABLE_bit                                    = 1 << 7,
+//  DB_ALPHA_TO_MASK                                      = 0x00028d44,
+       R7xx_OFFSET_ROUND_bit                             = 1 << 16,
+//  SQ_TEX_SAMPLER_MISC_0                                 = 0x0003d03c,
+       R7xx_TRUNCATE_COORD_bit                           = 1 << 9,
+       R7xx_DISABLE_CUBE_WRAP_bit                        = 1 << 10,
+
+} ;
+
+#endif /* _R600_REG_R7xx_H_ */
index fdb6f78754191211fcbcf12d743bc410878b9467..4d134070d65499907e4b3d289d845244231ec293 100644 (file)
@@ -36,7 +36,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #include "r600_swtcl.h"
 #include "r600_emit.h"
-#include "r600_tex.h"
 
 #define EMIT_ATTR( ATTR, STYLE )                                       \
 do {                                                                   \
@@ -65,104 +64,12 @@ do { \
 
 static void r600SwtclVAPSetup(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten)
 {
-       r600ContextPtr rmesa = R600_CONTEXT( ctx );
-       TNLcontext *tnl = TNL_CONTEXT(ctx);
-       struct vertex_buffer *VB = &tnl->vb;
-       struct vertex_attribute *attrs = rmesa->swtcl.vert_attrs;
-       int vte = 0;
-       int i, j, reg_count;
-       uint32_t *vir0 = &rmesa->hw.vir[0].cmd[1];
-       uint32_t *vir1 = &rmesa->hw.vir[1].cmd[1];
-
-       for (i = 0; i < R600_VIR_CMDSIZE-1; ++i)
-               vir0[i] = vir1[i] = 0;
-
-       for (i = 0, j = 0; i < rmesa->radeon.swtcl.vertex_attr_count; ++i) {
-               int tmp, data_format;
-               switch (attrs[i].format) {
-                       case EMIT_1F:
-                               data_format = R600_DATA_TYPE_FLOAT_1;
-                               break;
-                       case EMIT_2F:
-                               data_format = R600_DATA_TYPE_FLOAT_2;
-                               break;
-                       case EMIT_3F:
-                               data_format = R600_DATA_TYPE_FLOAT_3;
-                               break;
-                       case EMIT_4F:
-                               data_format = R600_DATA_TYPE_FLOAT_4;
-                               break;
-                       case EMIT_4UB_4F_RGBA:
-                       case EMIT_4UB_4F_ABGR:
-                               data_format = R600_DATA_TYPE_BYTE | R600_NORMALIZE;
-                               break;
-                       default:
-                               fprintf(stderr, "%s: Invalid data format type", __FUNCTION__);
-                               _mesa_exit(-1);
-                               break;
-               }
-
-               tmp = data_format | (attrs[i].dst_loc << R600_DST_VEC_LOC_SHIFT);
-               if (i % 2 == 0) {
-                       vir0[j] = tmp << R600_DATA_TYPE_0_SHIFT;
-                       vir1[j] = attrs[i].swizzle | (attrs[i].write_mask << R600_WRITE_ENA_SHIFT);
-               } else {
-                       vir0[j] |= tmp << R600_DATA_TYPE_1_SHIFT;
-                       vir1[j] |= (attrs[i].swizzle | (attrs[i].write_mask << R600_WRITE_ENA_SHIFT)) << R600_SWIZZLE1_SHIFT;
-                       ++j;
-               }
-       }
-
-       reg_count = (rmesa->radeon.swtcl.vertex_attr_count + 1) >> 1;
-       if (rmesa->radeon.swtcl.vertex_attr_count % 2 != 0) {
-               vir0[reg_count-1] |= R600_LAST_VEC << R600_DATA_TYPE_0_SHIFT;
-       } else {
-               vir0[reg_count-1] |= R600_LAST_VEC << R600_DATA_TYPE_1_SHIFT;
-       }
-
-       R600_STATECHANGE(rmesa, vir[0]);
-       R600_STATECHANGE(rmesa, vir[1]);
-       R600_STATECHANGE(rmesa, vof);
-       R600_STATECHANGE(rmesa, vte);
-       R600_STATECHANGE(rmesa, vic);
-
-       if (rmesa->radeon.radeonScreen->kernel_mm) {
-               rmesa->hw.vir[0].cmd[0] &= 0xC000FFFF;
-               rmesa->hw.vir[1].cmd[0] &= 0xC000FFFF;
-               rmesa->hw.vir[0].cmd[0] |= (reg_count & 0x3FFF) << 16;
-               rmesa->hw.vir[1].cmd[0] |= (reg_count & 0x3FFF) << 16;
-       } else {
-               ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count = reg_count;
-               ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = reg_count;
-       }
-
-       rmesa->hw.vic.cmd[R600_VIC_CNTL_0] = r600VAPInputCntl0(ctx, InputsRead);
-       rmesa->hw.vic.cmd[R600_VIC_CNTL_1] = r600VAPInputCntl1(ctx, InputsRead);
-       rmesa->hw.vof.cmd[R600_VOF_CNTL_0] = r600VAPOutputCntl0(ctx, OutputsWritten);
-       rmesa->hw.vof.cmd[R600_VOF_CNTL_1] = r600VAPOutputCntl1(ctx, OutputsWritten);
-
-       vte = rmesa->hw.vte.cmd[1];
-       vte &= ~(R600_VTX_XY_FMT | R600_VTX_Z_FMT | R600_VTX_W0_FMT);
-       /* Important:
-        */
-       if ( VB->NdcPtr != NULL ) {
-               VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
-               vte |= R600_VTX_XY_FMT | R600_VTX_Z_FMT;
-       }
-       else {
-               VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr;
-               vte |= R600_VTX_W0_FMT;
-       }
-
-       assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL );
-
-       rmesa->hw.vte.cmd[1] = vte;
-       rmesa->hw.vte.cmd[2] = rmesa->radeon.swtcl.vertex_size;
 }
 
 
 static void r600SetVertexFormat( GLcontext *ctx )
 {
+#if 0 /* to be enabled */
        r600ContextPtr rmesa = R600_CONTEXT( ctx );
        TNLcontext *tnl = TNL_CONTEXT(ctx);
        struct vertex_buffer *VB = &tnl->vb;
@@ -318,6 +225,7 @@ static void r600SetVertexFormat( GLcontext *ctx )
        rmesa->radeon.swtcl.vertex_size /= 4;
 
        RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, tnl->render_inputs_bitset);
+#endif /* to be enabled */
 }
 
 
@@ -584,6 +492,7 @@ static void r600ChooseRenderState( GLcontext *ctx )
 
 static void r600RenderStart(GLcontext *ctx)
 {
+#if 0 /* to be enabled */
        r600ContextPtr rmesa = R600_CONTEXT( ctx );
 
        r600ChooseRenderState(ctx);
@@ -600,6 +509,7 @@ static void r600RenderStart(GLcontext *ctx)
        if (rmesa->radeon.dma.flush != NULL) {
                rmesa->radeon.dma.flush(ctx);
        }
+#endif /* to be enabled */
 }
 
 static void r600RenderFinish(GLcontext *ctx)
@@ -608,12 +518,14 @@ static void r600RenderFinish(GLcontext *ctx)
 
 static void r600RasterPrimitive( GLcontext *ctx, GLuint hwprim )
 {
+#if 0 /* to be enabled */
        r600ContextPtr rmesa = R600_CONTEXT(ctx);
 
        if (rmesa->radeon.swtcl.hw_primitive != hwprim) {
                R600_NEWPRIM( rmesa );
                rmesa->radeon.swtcl.hw_primitive = hwprim;
        }
+#endif /* to be enabled */
 }
 
 static void r600RenderPrimitive(GLcontext *ctx, GLenum prim)
@@ -673,6 +585,7 @@ void r600DestroySwtcl(GLcontext *ctx)
 
 static void r600EmitVertexAOS(r600ContextPtr rmesa, GLuint vertex_size, struct radeon_bo *bo, GLuint offset)
 {
+#if 0 /* to be enabled */
        BATCH_LOCALS(&rmesa->radeon);
 
        if (RADEON_DEBUG & DEBUG_VERTS)
@@ -685,10 +598,12 @@ static void r600EmitVertexAOS(r600ContextPtr rmesa, GLuint vertex_size, struct r
        OUT_BATCH(vertex_size | (vertex_size << 8));
        OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
        END_BATCH();
+#endif /* to be enabled */
 }
 
 static void r600EmitVbufPrim(r600ContextPtr rmesa, GLuint primitive, GLuint vertex_nr)
 {
+#if 0 /* to be enabled */
        BATCH_LOCALS(&rmesa->radeon);
        int type, num_verts;
 
@@ -699,10 +614,12 @@ static void r600EmitVbufPrim(r600ContextPtr rmesa, GLuint primitive, GLuint vert
        OUT_BATCH_PACKET3(R600_PACKET3_3D_DRAW_VBUF_2, 0);
        OUT_BATCH(R600_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type);
        END_BATCH();
+#endif /* to be enabled */
 }
 
 void r600_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
 {
+#if 0 /* to be enabled */
        r600ContextPtr rmesa = R600_CONTEXT(ctx);
 
        rcommonEnsureCmdBufSpace(&rmesa->radeon,
@@ -719,4 +636,5 @@ void r600_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
                   rmesa->radeon.swtcl.numverts);
        r600EmitCacheFlush(rmesa);
        COMMIT_BATCH();
+#endif /* to be enabled */
 }
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c
new file mode 100644 (file)
index 0000000..5ceb3fd
--- /dev/null
@@ -0,0 +1,4068 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "main/mtypes.h"
+#include "main/imports.h"
+
+#include "r600_context.h"
+#include "r700_chip.h"
+#include "r700_debug.h"
+
+#include "r700_assembler.h"
+
+BITS addrmode_PVSDST(PVSDST * pPVSDST)
+{
+       return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1);
+}
+
+void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode) 
+{
+       pPVSDST->addrmode0 = addrmode & 1;
+       pPVSDST->addrmode1 = (addrmode >> 1) & 1;
+}
+
+void nomask_PVSDST(PVSDST * pPVSDST) 
+{
+       pPVSDST->writex = pPVSDST->writey = pPVSDST->writez = pPVSDST->writew = 1;
+}
+
+BITS addrmode_PVSSRC(PVSSRC* pPVSSRC) 
+{
+       return pPVSSRC->addrmode0 | ((BITS)pPVSSRC->addrmode1 << 1);
+}
+
+void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode) 
+{
+       pPVSSRC->addrmode0 = addrmode & 1;
+       pPVSSRC->addrmode1 = (addrmode >> 1) & 1;
+}
+
+
+void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz) 
+{
+       pPVSSRC->swizzlex = 
+       pPVSSRC->swizzley = 
+       pPVSSRC->swizzlez = 
+       pPVSSRC->swizzlew = swz;
+}
+
+void noswizzle_PVSSRC(PVSSRC* pPVSSRC) 
+{
+       pPVSSRC->swizzlex = SQ_SEL_X;
+       pPVSSRC->swizzley = SQ_SEL_Y;
+       pPVSSRC->swizzlez = SQ_SEL_Z;
+       pPVSSRC->swizzlew = SQ_SEL_W;
+}
+
+void
+swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w)
+{
+    switch (x) 
+    {
+        case SQ_SEL_X: x = pPVSSRC->swizzlex; 
+            break;
+        case SQ_SEL_Y: x = pPVSSRC->swizzley; 
+            break;
+        case SQ_SEL_Z: x = pPVSSRC->swizzlez; 
+            break;
+        case SQ_SEL_W: x = pPVSSRC->swizzlew; 
+            break;
+        default:;
+    }
+
+    switch (y) 
+    {
+        case SQ_SEL_X: y = pPVSSRC->swizzlex; 
+            break;
+        case SQ_SEL_Y: y = pPVSSRC->swizzley; 
+            break;
+        case SQ_SEL_Z: y = pPVSSRC->swizzlez; 
+            break;
+        case SQ_SEL_W: y = pPVSSRC->swizzlew; 
+            break;
+        default:;
+    }
+
+    switch (z) 
+    {
+        case SQ_SEL_X: z = pPVSSRC->swizzlex; 
+            break;
+        case SQ_SEL_Y: z = pPVSSRC->swizzley; 
+            break;
+        case SQ_SEL_Z: z = pPVSSRC->swizzlez; 
+            break;
+        case SQ_SEL_W: z = pPVSSRC->swizzlew; 
+            break;
+        default:;
+    }
+
+    switch (w) 
+    {
+        case SQ_SEL_X: w = pPVSSRC->swizzlex; 
+            break;
+        case SQ_SEL_Y: w = pPVSSRC->swizzley; 
+            break;
+        case SQ_SEL_Z: w = pPVSSRC->swizzlez; 
+            break;
+        case SQ_SEL_W: w = pPVSSRC->swizzlew; 
+            break;
+        default:;
+    }
+
+    pPVSSRC->swizzlex = x;
+    pPVSSRC->swizzley = y;
+    pPVSSRC->swizzlez = z;
+    pPVSSRC->swizzlew = w;
+}
+
+void neg_PVSSRC(PVSSRC* pPVSSRC) 
+{
+       pPVSSRC->negx = 1;
+       pPVSSRC->negy = 1;
+       pPVSSRC->negz = 1;
+       pPVSSRC->negw = 1;
+}
+
+void noneg_PVSSRC(PVSSRC* pPVSSRC) 
+{
+       pPVSSRC->negx = 0;
+       pPVSSRC->negy = 0;
+       pPVSSRC->negz = 0;
+       pPVSSRC->negw = 0;
+}
+
+// negate argument (for SUB instead of ADD and alike)
+void flipneg_PVSSRC(PVSSRC* pPVSSRC) 
+{
+       pPVSSRC->negx = !pPVSSRC->negx;
+       pPVSSRC->negy = !pPVSSRC->negy;
+       pPVSSRC->negz = !pPVSSRC->negz;
+       pPVSSRC->negw = !pPVSSRC->negw;
+}
+
+void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c) 
+{
+       switch (c) 
+       {
+               case 0: pPVSSRC->swizzlex = SQ_SEL_0; pPVSSRC->negx = 0; break;
+               case 1: pPVSSRC->swizzley = SQ_SEL_0; pPVSSRC->negy = 0; break;
+               case 2: pPVSSRC->swizzlez = SQ_SEL_0; pPVSSRC->negz = 0; break;
+               case 3: pPVSSRC->swizzlew = SQ_SEL_0; pPVSSRC->negw = 0; break;
+               default:;
+       } 
+}
+
+void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c) 
+{
+       switch (c) 
+       {
+               case 0: pPVSSRC->swizzlex = SQ_SEL_1; pPVSSRC->negx = 0; break;
+               case 1: pPVSSRC->swizzley = SQ_SEL_1; pPVSSRC->negy = 0; break;
+               case 2: pPVSSRC->swizzlez = SQ_SEL_1; pPVSSRC->negz = 0; break;
+               case 3: pPVSSRC->swizzlew = SQ_SEL_1; pPVSSRC->negw = 0; break;
+               default:;
+       } 
+}
+
+BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0)  
+{
+         return (pOutVTXFmt0->point_size            |
+                         pOutVTXFmt0->edge_flag             |
+                         pOutVTXFmt0->rta_index             |
+                         pOutVTXFmt0->kill_flag             |
+                         pOutVTXFmt0->viewport_index);
+}
+
+BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt) 
+{
+         return (pFPOutFmt->depth            | 
+                         pFPOutFmt->stencil_ref      | 
+                         pFPOutFmt->mask             | 
+                         pFPOutFmt->coverage_to_mask);
+}
+
+GLboolean is_reduction_opcode(PVSDWORD* dest)
+{
+    if (dest->dst.op3 == 0) 
+    {
+        if ( (dest->dst.opcode == SQ_OP2_INST_DOT4 || dest->dst.opcode == SQ_OP2_INST_DOT4_IEEE) ) 
+        {
+            return GL_TRUE;
+        }
+    }
+    return GL_FALSE;
+}
+
+GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size)
+{
+    GLuint format = FMT_INVALID;
+       GLuint uiElemSize = 0;
+
+    switch (eType)
+    {
+        case GL_BYTE:
+        case GL_UNSIGNED_BYTE:
+                       uiElemSize = 1;
+            switch(nChannels)
+            {
+                case 1:
+                    format = FMT_8; break;
+                case 2:
+                    format = FMT_8_8; break;
+                case 3:
+                    format = FMT_8_8_8; break;
+                case 4:
+                    format = FMT_8_8_8_8; break;
+                default:
+                    break;
+            }
+            break;
+
+        case GL_UNSIGNED_SHORT:
+        case GL_SHORT:
+                       uiElemSize = 2;
+            switch(nChannels)
+            {
+                case 1:
+                    format = FMT_16; break;
+                case 2:
+                    format = FMT_16_16; break;
+                case 3:
+                    format = FMT_16_16_16; break;
+                case 4:
+                    format = FMT_16_16_16_16; break;
+                default:
+                    break;
+            }
+            break;
+
+        case GL_UNSIGNED_INT:
+        case GL_INT:
+                       uiElemSize = 4;
+            switch(nChannels)
+            {
+                case 1:
+                    format = FMT_32; break;
+                case 2:
+                    format = FMT_32_32; break;
+                case 3:
+                    format = FMT_32_32_32; break;
+                case 4:
+                    format = FMT_32_32_32_32; break;
+                default:
+                    break;
+            }
+            break;
+
+        case GL_FLOAT:
+                       uiElemSize = 4;
+                       switch(nChannels)
+            {
+                case 1:
+                    format = FMT_32_FLOAT; break;
+                case 2:
+                    format = FMT_32_32_FLOAT; break;
+                case 3:
+                    format = FMT_32_32_32_FLOAT; break;
+                case 4:
+                    format = FMT_32_32_32_32_FLOAT; break;
+                default:
+                    break;
+            }
+                       break;
+        case GL_DOUBLE:
+                       uiElemSize = 8;
+            switch(nChannels)
+            {
+                case 1:
+                    format = FMT_32_FLOAT; break;
+                case 2:
+                    format = FMT_32_32_FLOAT; break;
+                case 3:
+                    format = FMT_32_32_32_FLOAT; break;
+                case 4:
+                    format = FMT_32_32_32_32_FLOAT; break;
+                default:
+                    break;
+            }
+            break;
+        default:
+                       ;
+            //GL_ASSERT_NO_CASE();
+    }
+
+    if(NULL != pClient_size)
+    {
+           *pClient_size = uiElemSize * nChannels;
+    }
+
+    return(format);
+}
+
+unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm)
+{
+    if(pAsm->D.dst.op3)
+    {
+        return 3;
+    }
+
+    switch (pAsm->D.dst.opcode)
+    {
+    case SQ_OP2_INST_ADD:                          
+    case SQ_OP2_INST_MUL: 
+    case SQ_OP2_INST_MAX:
+    case SQ_OP2_INST_MIN:
+    //case SQ_OP2_INST_MAX_DX10:
+    //case SQ_OP2_INST_MIN_DX10:
+    case SQ_OP2_INST_SETGT:
+    case SQ_OP2_INST_SETGE:
+    case SQ_OP2_INST_PRED_SETE:
+    case SQ_OP2_INST_PRED_SETGT:
+    case SQ_OP2_INST_PRED_SETGE:
+    case SQ_OP2_INST_PRED_SETNE:
+    case SQ_OP2_INST_DOT4:
+    case SQ_OP2_INST_DOT4_IEEE:
+        return 2;  
+
+    case SQ_OP2_INST_MOV: 
+    case SQ_OP2_INST_FRACT:
+    case SQ_OP2_INST_FLOOR:
+    case SQ_OP2_INST_KILLGT:
+    case SQ_OP2_INST_EXP_IEEE:
+    case SQ_OP2_INST_LOG_CLAMPED:
+    case SQ_OP2_INST_LOG_IEEE:
+    case SQ_OP2_INST_RECIP_IEEE:
+    case SQ_OP2_INST_RECIPSQRT_IEEE:
+    case SQ_OP2_INST_FLT_TO_INT:
+    case SQ_OP2_INST_SIN:
+    case SQ_OP2_INST_COS:
+        return 1;
+        
+    default: r700_error(TODO_ASM_NEEDIMPINST, 
+                        "Need instruction operand number. \n");;
+    };
+
+    return 3;
+}
+
+int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader)
+{
+    GLuint i;
+
+    Init_R700_Shader(pShader);
+    pAsm->pR700Shader = pShader;
+    pAsm->currentShaderType = spt;
+
+       pAsm->cf_last_export_ptr   = NULL;
+
+       pAsm->cf_current_export_clause_ptr = NULL;
+       pAsm->cf_current_alu_clause_ptr    = NULL;
+       pAsm->cf_current_tex_clause_ptr    = NULL;
+       pAsm->cf_current_vtx_clause_ptr    = NULL;
+       pAsm->cf_current_cf_clause_ptr     = NULL;
+
+       // No clause has been created yet
+       pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
+
+       pAsm->number_of_colorandz_exports = 0;
+       pAsm->number_of_exports           = 0;
+       pAsm->number_of_export_opcodes    = 0;
+
+
+       pAsm->D.bits = 0;
+       pAsm->S[0].bits = 0;
+       pAsm->S[1].bits = 0;
+       pAsm->S[2].bits = 0;
+
+       pAsm->uLastPosUpdate = 0; 
+       
+       *(BITS *) &pAsm->fp_stOutFmt0 = 0;
+
+       pAsm->uIIns = 0;
+       pAsm->uOIns = 0;
+       pAsm->number_used_registers = 0;
+       pAsm->uUsedConsts = 256; 
+
+
+       // Fragment programs
+       pAsm->uBoolConsts = 0;
+       pAsm->uIntConsts = 0;
+       pAsm->uInsts = 0;
+       pAsm->uConsts = 0;
+
+       pAsm->FCSP = 0;
+       pAsm->fc_stack[0].type = FC_NONE;
+
+       pAsm->branch_depth     = 0;
+       pAsm->max_branch_depth = 0;
+
+       pAsm->aArgSubst[0] =
+       pAsm->aArgSubst[1] =
+       pAsm->aArgSubst[2] =
+       pAsm->aArgSubst[3] = (-1);
+
+       pAsm->uOutputs = 0;
+
+       for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++) 
+       {
+               pAsm->color_export_register_number[i] = (-1);
+       }
+
+
+       pAsm->depth_export_register_number = (-1);
+       pAsm->stencil_export_register_number = (-1);
+       pAsm->coverage_to_mask_export_register_number = (-1);
+       pAsm->mask_export_register_number = (-1);
+
+       pAsm->starting_export_register_number = 0;
+       pAsm->starting_vfetch_register_number = 0;
+       pAsm->starting_temp_register_number   = 0;
+       pAsm->uFirstHelpReg = 0;
+
+
+       pAsm->input_position_is_used = GL_FALSE;
+       pAsm->input_normal_is_used   = GL_FALSE;
+
+
+       for (i=0; i<NUMBER_OF_INPUT_COLORS; i++) 
+       {
+               pAsm->input_color_is_used[ i ] = GL_FALSE;
+       }
+
+       for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++) 
+       {
+               pAsm->input_texture_unit_is_used[ i ] = GL_FALSE;
+       }
+
+       for (i=0; i<VERT_ATTRIB_MAX; i++) 
+       {
+               pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
+       }
+
+       pAsm->number_of_inputs = 0;
+
+       return 0;
+}
+
+GLboolean IsTex(gl_inst_opcode Opcode)
+{
+    if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) )
+    {
+        return GL_TRUE;
+    }
+    return GL_FALSE;
+}
+
+GLboolean IsAlu(gl_inst_opcode Opcode)
+{
+    //TODO : more for fc and ex for higher spec.
+    if( IsTex(Opcode) )
+    {
+        return GL_FALSE;
+    }
+    return GL_TRUE;
+}
+
+int check_current_clause(r700_AssemblerBase* pAsm,
+                                            CF_CLAUSE_TYPE      new_clause_type)
+{
+       if (pAsm->cf_current_clause_type != new_clause_type) 
+       {       //Close last open clause
+               switch (pAsm->cf_current_clause_type) 
+               {
+               case CF_ALU_CLAUSE:
+                       if ( pAsm->cf_current_alu_clause_ptr != NULL) 
+            {
+                pAsm->cf_current_alu_clause_ptr = NULL;
+            }
+                       break;
+               case CF_VTX_CLAUSE:
+                       if ( pAsm->cf_current_vtx_clause_ptr != NULL) 
+            {
+                pAsm->cf_current_vtx_clause_ptr = NULL;
+            }
+                       break;
+               case CF_TEX_CLAUSE:
+                       if ( pAsm->cf_current_tex_clause_ptr != NULL) 
+            {
+                pAsm->cf_current_tex_clause_ptr = NULL;
+            }
+                       break;
+               case CF_EXPORT_CLAUSE:
+                       if ( pAsm->cf_current_export_clause_ptr != NULL) 
+            {
+                pAsm->cf_current_export_clause_ptr = NULL;
+            }
+                       break;
+               case CF_OTHER_CLAUSE:
+                       if ( pAsm->cf_current_cf_clause_ptr != NULL) 
+            {
+                pAsm->cf_current_cf_clause_ptr = NULL;
+            }
+                       break;
+               case CF_EMPTY_CLAUSE:
+                       break;
+               default:
+            r700_error(ERROR_ASM_VTX_CLAUSE,
+                       "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
+                       return GL_FALSE;
+               }
+
+        pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
+
+               // Create new clause
+        switch (new_clause_type) 
+           {
+        case CF_ALU_CLAUSE:
+            pAsm->cf_current_clause_type = CF_ALU_CLAUSE;
+            break;
+        case CF_VTX_CLAUSE:
+            pAsm->cf_current_clause_type = CF_VTX_CLAUSE;
+            break;
+        case CF_TEX_CLAUSE:        
+            pAsm->cf_current_clause_type = CF_TEX_CLAUSE;
+            break;
+        case CF_EXPORT_CLAUSE:
+            {
+                R700ControlFlowSXClause* pR700ControlFlowSXClause 
+                            = (R700ControlFlowSXClause*) CALLOC_STRUCT(R700ControlFlowSXClause); 
+            
+                // Add new export instruction to control flow program        
+                if (pR700ControlFlowSXClause != 0) 
+                {
+                    pAsm->cf_current_export_clause_ptr = pR700ControlFlowSXClause;
+                    Init_R700ControlFlowSXClause(pR700ControlFlowSXClause);
+                    AddCFInstruction( pAsm->pR700Shader, 
+                                      (R700ControlFlowInstruction *)pR700ControlFlowSXClause );
+                }
+                else 
+                {
+                    r700_error(ERROR_ASM_ALLOCEXPORTCF,
+                               "Error allocating new EXPORT CF instruction in check_current_clause. \n");
+                    return GL_FALSE;
+                }
+                pAsm->cf_current_clause_type = CF_EXPORT_CLAUSE;
+            }
+            break;
+        case CF_EMPTY_CLAUSE:
+            break;
+        case CF_OTHER_CLAUSE:
+            pAsm->cf_current_clause_type = CF_OTHER_CLAUSE;
+            break;
+        default:
+            r700_error(ERROR_ASM_UNKOWNCLAUSE,
+                       "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
+            return GL_FALSE;
+        }
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean add_vfetch_instruction(r700_AssemblerBase*     pAsm,
+                                                                R700VertexInstruction*  vertex_instruction_ptr)
+{
+       if( GL_FALSE == check_current_clause(pAsm,  CF_VTX_CLAUSE) )
+       {
+               return GL_FALSE;
+       }
+
+    if( pAsm->cf_current_vtx_clause_ptr == NULL ||
+        ( (pAsm->cf_current_vtx_clause_ptr != NULL) && 
+         (pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_vtx_clause_ptr->m_ShaderInstType)-1) 
+        ) ) 
+    { 
+               // Create new Vfetch control flow instruction for this new clause
+               pAsm->cf_current_vtx_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
+
+               if (pAsm->cf_current_vtx_clause_ptr != NULL) 
+               {
+                       Init_R700ControlFlowGenericClause(pAsm->cf_current_vtx_clause_ptr);
+                       AddCFInstruction( pAsm->pR700Shader, 
+                              (R700ControlFlowInstruction *)pAsm->cf_current_vtx_clause_ptr );
+               }
+               else 
+               {
+            r700_error(ERROR_ASM_ALLOCVTXCF, "Could not allocate a new VFetch CF instruction.");
+                       return GL_FALSE;
+               }
+
+               pAsm->cf_current_vtx_clause_ptr->m_Word1.f.pop_count        = 0x0;
+               pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_const         = 0x0;
+               pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
+               pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count            = 0x0;
+               pAsm->cf_current_vtx_clause_ptr->m_Word1.f.end_of_program   = 0x0;
+               pAsm->cf_current_vtx_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+               pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_VTX;
+               pAsm->cf_current_vtx_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
+               pAsm->cf_current_vtx_clause_ptr->m_Word1.f.barrier          = 0x1;
+
+               LinkVertexInstruction(pAsm->cf_current_vtx_clause_ptr, vertex_instruction_ptr );
+       }
+       else
+       {
+               pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count++;
+       }
+
+       AddVTXInstruction(pAsm->pR700Shader, vertex_instruction_ptr);
+
+       return GL_TRUE;
+}
+
+GLboolean add_tex_instruction(r700_AssemblerBase*     pAsm,
+                              R700TextureInstruction* tex_instruction_ptr)
+{ 
+    if ( GL_FALSE == check_current_clause(pAsm, CF_TEX_CLAUSE) )
+    {
+        return GL_FALSE;
+    }
+
+    if ( pAsm->cf_current_tex_clause_ptr == NULL ||
+         ( (pAsm->cf_current_tex_clause_ptr != NULL) && 
+           (pAsm->cf_current_tex_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_tex_clause_ptr->m_ShaderInstType)-1) 
+         ) ) 
+    {
+        // new tex cf instruction for this new clause  
+        pAsm->cf_current_tex_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
+
+               if (pAsm->cf_current_tex_clause_ptr != NULL) 
+               {
+                       Init_R700ControlFlowGenericClause(pAsm->cf_current_tex_clause_ptr);
+                       AddCFInstruction( pAsm->pR700Shader, 
+                              (R700ControlFlowInstruction *)pAsm->cf_current_tex_clause_ptr );
+               }
+               else 
+               {
+            r700_error(ERROR_ASM_ALLOCTEXCF, "Could not allocate a new TEX CF instruction.");
+                       return GL_FALSE;
+               }
+        
+        pAsm->cf_current_tex_clause_ptr->m_Word1.f.pop_count        = 0x0;
+        pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_const         = 0x0;
+        pAsm->cf_current_tex_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
+
+        pAsm->cf_current_tex_clause_ptr->m_Word1.f.end_of_program   = 0x0;
+        pAsm->cf_current_tex_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+        pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_TEX;
+        pAsm->cf_current_tex_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
+        pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier          = 0x0;   //0x1;
+    }
+    else 
+    {        
+        pAsm->cf_current_tex_clause_ptr->m_Word1.f.count++;
+    }
+
+    // If this clause constains any TEX instruction that is dependent on a previous instruction, 
+    // set the barrier bit
+    if( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) )
+    {
+        pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x1;  
+    }
+
+    if(NULL == pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction)
+    {
+        pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction = tex_instruction_ptr;
+        tex_instruction_ptr->m_pLinkedGenericClause = pAsm->cf_current_tex_clause_ptr;
+    }
+
+    AddTEXInstruction(pAsm->pR700Shader, tex_instruction_ptr);
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
+                                                               GLuint gl_client_id,
+                                GLuint destination_register,
+                                                               GLuint number_of_elements,
+                                GLenum dataElementType,
+                                                               VTX_FETCH_METHOD* pFetchMethod)
+{
+    GLuint client_size_inbyte;
+       GLuint data_format;
+    GLuint mega_fetch_count;
+       GLuint is_mega_fetch_flag;
+
+       R700VertexGenericFetch*   vfetch_instruction_ptr;
+       R700VertexGenericFetch*   assembled_vfetch_instruction_ptr = pAsm->vfetch_instruction_ptr_array[ gl_client_id ];
+
+       if (assembled_vfetch_instruction_ptr == NULL) 
+       {
+               vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
+               if (vfetch_instruction_ptr == NULL) 
+               {
+                       return GL_FALSE;
+               }
+        Init_R700VertexGenericFetch(vfetch_instruction_ptr);
+    }
+       else 
+       {
+               vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
+       }
+
+       data_format = GetSurfaceFormat(dataElementType, number_of_elements, &client_size_inbyte);
+
+       if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
+       {
+               //TODO : mini fetch
+       }
+       else
+       {
+               mega_fetch_count = MEGA_FETCH_BYTES - 1;
+               is_mega_fetch_flag       = 0x1;
+               pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
+       }
+
+       vfetch_instruction_ptr->m_Word0.f.vtx_inst         = SQ_VTX_INST_FETCH;
+       vfetch_instruction_ptr->m_Word0.f.fetch_type       = SQ_VTX_FETCH_VERTEX_DATA;
+       vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
+
+       vfetch_instruction_ptr->m_Word0.f.buffer_id        = gl_client_id;
+       vfetch_instruction_ptr->m_Word0.f.src_gpr          = 0x0; 
+       vfetch_instruction_ptr->m_Word0.f.src_rel          = SQ_ABSOLUTE;
+       vfetch_instruction_ptr->m_Word0.f.src_sel_x        = SQ_SEL_X;
+       vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
+
+       vfetch_instruction_ptr->m_Word1.f.dst_sel_x        = (number_of_elements < 1) ? SQ_SEL_0 : SQ_SEL_X;
+       vfetch_instruction_ptr->m_Word1.f.dst_sel_y        = (number_of_elements < 2) ? SQ_SEL_0 : SQ_SEL_Y;
+       vfetch_instruction_ptr->m_Word1.f.dst_sel_z        = (number_of_elements < 3) ? SQ_SEL_0 : SQ_SEL_Z;
+       vfetch_instruction_ptr->m_Word1.f.dst_sel_w        = (number_of_elements < 4) ? SQ_SEL_1 : SQ_SEL_W;
+
+       vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
+
+       // Destination register
+       vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register; 
+       vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
+
+       vfetch_instruction_ptr->m_Word2.f.offset              = 0;
+       vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
+
+       vfetch_instruction_ptr->m_Word2.f.mega_fetch          = is_mega_fetch_flag;
+
+       if (assembled_vfetch_instruction_ptr == NULL) 
+       {
+               if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) ) 
+        {   
+                       return GL_FALSE;
+               }
+
+               if (pAsm->vfetch_instruction_ptr_array[ gl_client_id ] != NULL) 
+               {
+                       return GL_FALSE;
+               }
+               else 
+               {
+                       pAsm->vfetch_instruction_ptr_array[ gl_client_id ] = vfetch_instruction_ptr;
+               }
+       }
+
+       return GL_TRUE;
+}
+
+GLuint gethelpr(r700_AssemblerBase* pAsm) 
+{
+    GLuint r = pAsm->uHelpReg;
+    pAsm->uHelpReg++;
+    if (pAsm->uHelpReg > pAsm->number_used_registers)
+    {
+        pAsm->number_used_registers = pAsm->uHelpReg;
+       }
+    return r;
+}
+void resethelpr(r700_AssemblerBase* pAsm) 
+{
+    pAsm->uHelpReg = pAsm->uFirstHelpReg;
+}
+
+void checkop_init(r700_AssemblerBase* pAsm)
+{
+    resethelpr(pAsm);
+    pAsm->aArgSubst[0] =
+    pAsm->aArgSubst[1] =
+    pAsm->aArgSubst[2] =
+    pAsm->aArgSubst[3] = -1;
+}
+
+GLboolean mov_temp(r700_AssemblerBase* pAsm, int src)
+{
+    GLuint tmp = gethelpr(pAsm);
+
+    //mov src to temp helper gpr.
+    pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+  
+    pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg   = tmp;
+
+    nomask_PVSDST(&(pAsm->D.dst));
+
+    if( GL_FALSE == assemble_src(pAsm, src, 0) )
+    {
+        return GL_FALSE;
+    }
+
+    noswizzle_PVSSRC(&(pAsm->S[0].src));
+    noneg_PVSSRC(&(pAsm->S[0].src));
+   
+    if( GL_FALSE == next_ins(pAsm) ) 
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->aArgSubst[1 + src] = tmp;
+
+    return GL_TRUE;
+}
+
+GLboolean checkop1(r700_AssemblerBase* pAsm)
+{
+    checkop_init(pAsm);
+    return GL_TRUE;
+}
+
+GLboolean checkop2(r700_AssemblerBase* pAsm)
+{
+    GLboolean bSrcConst[2];
+    struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
+
+    checkop_init(pAsm);
+
+    if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT)    ||
+        (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
+        (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM)   ||
+        (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
+    {
+        bSrcConst[0] = GL_TRUE;
+    }
+    else
+    {
+        bSrcConst[0] = GL_FALSE;
+    }
+    if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT)    ||
+        (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
+        (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM)   ||
+        (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
+    {
+        bSrcConst[1] = GL_TRUE;
+    }
+    else
+    {
+        bSrcConst[1] = GL_FALSE;
+    }
+
+    if( (bSrcConst[0] == GL_TRUE) && (bSrcConst[1] == GL_TRUE) )
+    {
+        if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
+        {
+            if( GL_FALSE == mov_temp(pAsm, 1) )
+            {
+                return GL_FALSE;
+            }
+        }
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean checkop3(r700_AssemblerBase* pAsm)
+{
+    GLboolean bSrcConst[3];
+    struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
+
+    checkop_init(pAsm);
+
+    if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT)    ||
+        (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
+        (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM)   ||
+        (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
+    {
+        bSrcConst[0] = GL_TRUE;
+    }
+    else
+    {
+        bSrcConst[0] = GL_FALSE;
+    }
+    if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT)    ||
+        (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
+        (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM)   ||
+        (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
+    {
+        bSrcConst[1] = GL_TRUE;
+    }
+    else
+    {
+        bSrcConst[1] = GL_FALSE;
+    }
+    if( (pILInst->SrcReg[2].File == PROGRAM_CONSTANT)    ||
+        (pILInst->SrcReg[2].File == PROGRAM_LOCAL_PARAM) ||
+        (pILInst->SrcReg[2].File == PROGRAM_ENV_PARAM)   ||
+        (pILInst->SrcReg[2].File == PROGRAM_STATE_VAR) )
+    {
+        bSrcConst[2] = GL_TRUE;
+    }
+    else
+    {
+        bSrcConst[2] = GL_FALSE;
+    }
+
+    if( (GL_TRUE == bSrcConst[0]) && 
+        (GL_TRUE == bSrcConst[1]) && 
+        (GL_TRUE == bSrcConst[2]) ) 
+    {
+        if( GL_FALSE == mov_temp(pAsm, 1) )
+        {
+            return GL_FALSE;
+        }
+        if( GL_FALSE == mov_temp(pAsm, 2) )
+        {
+            return GL_FALSE;
+        }
+
+        return GL_TRUE;
+    }
+    else if( (GL_TRUE == bSrcConst[0]) && 
+             (GL_TRUE == bSrcConst[1]) ) 
+    {
+        if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)    
+           {
+            if( GL_FALSE == mov_temp(pAsm, 1) )
+            {
+                return 1;
+            }
+        }
+
+        return GL_TRUE;
+    }
+    else if ( (GL_TRUE == bSrcConst[0]) && 
+              (GL_TRUE == bSrcConst[2]) )  
+    {
+        if(pILInst->SrcReg[0].Index != pILInst->SrcReg[2].Index)     
+           {
+            if( GL_FALSE == mov_temp(pAsm, 2) )
+            {
+                return GL_FALSE;
+            }
+        }
+
+        return GL_TRUE;
+    }
+    else if( (GL_TRUE == bSrcConst[1]) && 
+             (GL_TRUE == bSrcConst[2]) ) 
+    {
+        if(pILInst->SrcReg[1].Index != pILInst->SrcReg[2].Index)
+           {
+            if( GL_FALSE == mov_temp(pAsm, 2) )
+            {
+                return GL_FALSE;
+            }
+        }
+
+        return GL_TRUE;
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_src(r700_AssemblerBase *pAsm,
+                       int src, 
+                       int fld)
+{
+    struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
+
+    if (fld == -1)
+    {
+        fld = src;
+    }
+
+    if(pAsm->aArgSubst[1+src] >= 0) 
+    {
+        setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
+        pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
+        pAsm->S[fld].src.reg   = pAsm->aArgSubst[1+src];
+    }
+    else 
+    {
+        switch (pILInst->SrcReg[src].File)
+        {
+        case PROGRAM_TEMPORARY:
+            setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
+            pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
+            pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index + pAsm->starting_temp_register_number;
+            break;
+        case PROGRAM_CONSTANT:
+        case PROGRAM_LOCAL_PARAM:
+        case PROGRAM_ENV_PARAM:
+        case PROGRAM_STATE_VAR:
+            if (1 == pILInst->SrcReg[src].RelAddr)
+            {
+                setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_RELATIVE_A0);
+            }
+            else
+            {
+                setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);              
+            }
+
+            pAsm->S[fld].src.rtype = SRC_REG_CONSTANT;
+            pAsm->S[fld].src.reg   = pILInst->SrcReg[src].Index;
+            break;      
+        case PROGRAM_INPUT:
+            setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
+            pAsm->S[fld].src.rtype = SRC_REG_INPUT;
+            switch (pAsm->currentShaderType)
+            {
+            case SPT_FP:
+                pAsm->S[fld].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[src].Index];
+                break;
+            case SPT_VP:
+                pAsm->S[fld].src.reg = pAsm->ucVP_AttributeMap[pILInst->SrcReg[src].Index];
+                break;
+            }
+            break;      
+        default:
+            r700_error(ERROR_ASM_SRCARGUMENT, "Invalid source argument type");          
+            return GL_FALSE;
+        }
+    } 
+
+    pAsm->S[fld].src.swizzlex = pILInst->SrcReg[src].Swizzle & 0x7;
+    pAsm->S[fld].src.swizzley = (pILInst->SrcReg[src].Swizzle >> 3) & 0x7;
+    pAsm->S[fld].src.swizzlez = (pILInst->SrcReg[src].Swizzle >> 6) & 0x7;
+    pAsm->S[fld].src.swizzlew = (pILInst->SrcReg[src].Swizzle >> 9) & 0x7;
+
+    pAsm->S[fld].src.negx = pILInst->SrcReg[src].NegateBase & 0x1;
+    pAsm->S[fld].src.negy = (pILInst->SrcReg[src].NegateBase >> 1) & 0x1;
+    pAsm->S[fld].src.negz = (pILInst->SrcReg[src].NegateBase >> 2) & 0x1;
+    pAsm->S[fld].src.negw = (pILInst->SrcReg[src].NegateBase >> 3) & 0x1;
+     
+    return GL_TRUE;
+}
+
+GLboolean assemble_dst(r700_AssemblerBase *pAsm)
+{
+    struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
+    switch (pILInst->DstReg.File) 
+    {
+    case PROGRAM_TEMPORARY:
+        setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+        pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+        pAsm->D.dst.reg = pILInst->DstReg.Index + pAsm->starting_temp_register_number;
+        break;
+    case PROGRAM_ADDRESS:
+        setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+        pAsm->D.dst.rtype = DST_REG_A0;
+        pAsm->D.dst.reg = 0;
+        break;
+    case PROGRAM_OUTPUT:
+        setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+        pAsm->D.dst.rtype = DST_REG_OUT;
+        switch (pAsm->currentShaderType)
+        {
+        case SPT_FP:
+            pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
+            break;
+        case SPT_VP:
+            pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
+            break;
+        }
+        break;   
+    default:
+        r700_error(ERROR_ASM_DSTARGUMENT, "Invalid destination output argument type");
+        return GL_FALSE;
+    }
+
+    pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
+    pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
+    pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
+    pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
+  
+    return GL_TRUE;
+}
+
+GLboolean tex_dst(r700_AssemblerBase *pAsm)
+{
+    struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
+
+    if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
+    {
+        pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+        pAsm->D.dst.reg   = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number;
+
+        setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+    }
+    else if(PROGRAM_OUTPUT == pILInst->DstReg.File)
+    {
+        pAsm->D.dst.rtype = DST_REG_OUT;
+        switch (pAsm->currentShaderType)
+        {
+        case SPT_FP:
+            pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
+            break;
+        case SPT_VP:
+            pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
+            break;
+        }
+
+        setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+    }
+    else 
+    {
+        r700_error(ERROR_ASM_DSTARGUMENT, "Invalid destination output argument type");
+        return GL_FALSE;
+    }
+
+    pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
+    pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
+    pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
+    pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
+  
+    return GL_TRUE;
+}
+
+GLboolean tex_src(r700_AssemblerBase *pAsm)
+{
+    struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
+    GLboolean bValidTexCoord = GL_FALSE;
+
+    switch (pILInst->SrcReg[0].File)
+    {
+    case PROGRAM_TEMPORARY:
+        bValidTexCoord = GL_TRUE;
+
+        pAsm->S[0].src.reg   = pILInst->SrcReg[0].Index + pAsm->starting_temp_register_number;
+        pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+
+        break;
+    case PROGRAM_INPUT:
+        switch (pILInst->SrcReg[0].Index)
+        {
+        case FRAG_ATTRIB_COL0:
+        case FRAG_ATTRIB_COL1:
+        case FRAG_ATTRIB_TEX0:
+        case FRAG_ATTRIB_TEX1:
+        case FRAG_ATTRIB_TEX2:
+        case FRAG_ATTRIB_TEX3:
+        case FRAG_ATTRIB_TEX4:
+        case FRAG_ATTRIB_TEX5:
+        case FRAG_ATTRIB_TEX6:
+        case FRAG_ATTRIB_TEX7:
+            bValidTexCoord = GL_TRUE;
+
+            pAsm->S[0].src.reg   = pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
+            pAsm->S[0].src.rtype = SRC_REG_INPUT;
+        }
+        break;
+    }
+
+    if(GL_TRUE == bValidTexCoord)
+    { 
+        setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    }
+    else
+    {
+        r700_error(ERROR_ASM_BADTEXSRC, "Invalid source texcoord for TEX instruction");
+        return GL_FALSE;
+    }
+
+    pAsm->S[0].src.swizzlex = pILInst->SrcReg[0].Swizzle & 0x7;
+    pAsm->S[0].src.swizzley = (pILInst->SrcReg[0].Swizzle >> 3) & 0x7;
+    pAsm->S[0].src.swizzlez = (pILInst->SrcReg[0].Swizzle >> 6) & 0x7;
+    pAsm->S[0].src.swizzlew = (pILInst->SrcReg[0].Swizzle >> 9) & 0x7;
+
+    pAsm->S[0].src.negx = pILInst->SrcReg[0].NegateBase & 0x1;
+    pAsm->S[0].src.negy = (pILInst->SrcReg[0].NegateBase >> 1) & 0x1;
+    pAsm->S[0].src.negz = (pILInst->SrcReg[0].NegateBase >> 2) & 0x1;
+    pAsm->S[0].src.negw = (pILInst->SrcReg[0].NegateBase >> 3) & 0x1;
+     
+    return GL_TRUE;
+}
+
+GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm)
+{
+    PVSSRC *   texture_coordinate_source;
+    PVSSRC *   texture_unit_source;
+    
+    R700TextureInstruction* tex_instruction_ptr = (R700TextureInstruction*) CALLOC_STRUCT(R700TextureInstruction);
+       if (tex_instruction_ptr == NULL) 
+       {
+               return GL_FALSE;
+       }
+    Init_R700TextureInstruction(tex_instruction_ptr);
+
+    texture_coordinate_source = &(pAsm->S[0].src);
+    texture_unit_source       = &(pAsm->S[1].src);
+
+    tex_instruction_ptr->m_Word0.f.tex_inst         = pAsm->D.dst.opcode;
+    tex_instruction_ptr->m_Word0.f.bc_frac_mode     = 0x0;
+    tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
+
+    tex_instruction_ptr->m_Word0.f.resource_id      = texture_unit_source->reg;
+
+    tex_instruction_ptr->m_Word1.f.lod_bias     = 0x0;
+    tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_NORMALIZED;
+    tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_NORMALIZED;
+    tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_NORMALIZED;
+    tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_NORMALIZED;
+
+    tex_instruction_ptr->m_Word2.f.offset_x   = 0x0;
+    tex_instruction_ptr->m_Word2.f.offset_y   = 0x0;
+    tex_instruction_ptr->m_Word2.f.offset_z   = 0x0;
+
+    tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg;
+
+    // dst
+    if ( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) || 
+         (pAsm->D.dst.rtype == DST_REG_OUT) ) 
+    {
+        tex_instruction_ptr->m_Word0.f.src_gpr    = texture_coordinate_source->reg;
+        tex_instruction_ptr->m_Word0.f.src_rel    = SQ_ABSOLUTE;
+
+        tex_instruction_ptr->m_Word1.f.dst_gpr    = pAsm->D.dst.reg;
+        tex_instruction_ptr->m_Word1.f.dst_rel    = SQ_ABSOLUTE;
+
+        tex_instruction_ptr->m_Word1.f.dst_sel_x  = (pAsm->D.dst.writex ? texture_unit_source->swizzlex : SQ_SEL_MASK);
+        tex_instruction_ptr->m_Word1.f.dst_sel_y  = (pAsm->D.dst.writey ? texture_unit_source->swizzley : SQ_SEL_MASK);
+        tex_instruction_ptr->m_Word1.f.dst_sel_z  = (pAsm->D.dst.writez ? texture_unit_source->swizzlez : SQ_SEL_MASK);
+        tex_instruction_ptr->m_Word1.f.dst_sel_w  = (pAsm->D.dst.writew ? texture_unit_source->swizzlew : SQ_SEL_MASK);
+
+
+        tex_instruction_ptr->m_Word2.f.src_sel_x  = texture_coordinate_source->swizzlex;
+        tex_instruction_ptr->m_Word2.f.src_sel_y  = texture_coordinate_source->swizzley;
+        tex_instruction_ptr->m_Word2.f.src_sel_z  = texture_coordinate_source->swizzlez;
+        tex_instruction_ptr->m_Word2.f.src_sel_w  = texture_coordinate_source->swizzlew;
+    }
+    else 
+    {
+        r700_error(ERROR_ASM_TEXDSTBADTYPE, "Only temp destination registers supported for TEX dest regs.");
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == add_tex_instruction(pAsm, tex_instruction_ptr) )
+    {
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+
+void initialize(r700_AssemblerBase *pAsm)
+{
+    GLuint cycle, component;
+
+    for (cycle=0; cycle<NUMBER_OF_CYCLES; cycle++) 
+    {
+        for (component=0; component<NUMBER_OF_COMPONENTS; component++) 
+        {
+            pAsm->hw_gpr[cycle][component] = (-1);
+        }
+    }
+    for (component=0; component<NUMBER_OF_COMPONENTS; component++) 
+    {
+        pAsm->hw_cfile_addr[component] = (-1);
+        pAsm->hw_cfile_chan[component] = (-1);
+    }
+}
+
+GLboolean assemble_alu_src(R700ALUInstruction*  alu_instruction_ptr,
+                           int                  source_index,
+                           PVSSRC*              pSource,
+                           BITS                 scalar_channel_index)
+{
+    BITS src_sel;
+    BITS src_rel;
+    BITS src_chan;
+    BITS src_neg;
+
+    //--------------------------------------------------------------------------
+    // Source for operands src0, src1. 
+    // Values [0,127] correspond to GPR[0..127]. 
+    // Values [256,511] correspond to cfile constants c[0..255]. 
+
+    //--------------------------------------------------------------------------
+    // Other special values are shown in the list below.
+
+    // 248     SQ_ALU_SRC_0: special constant 0.0.
+    // 249     SQ_ALU_SRC_1: special constant 1.0 float.
+
+    // 250     SQ_ALU_SRC_1_INT: special constant 1 integer.
+    // 251     SQ_ALU_SRC_M_1_INT: special constant -1 integer.
+
+    // 252     SQ_ALU_SRC_0_5: special constant 0.5 float.
+    // 253     SQ_ALU_SRC_LITERAL: literal constant.
+
+    // 254     SQ_ALU_SRC_PV: previous vector result.
+    // 255     SQ_ALU_SRC_PS: previous scalar result.
+    //--------------------------------------------------------------------------
+
+    BITS channel_swizzle;
+    switch (scalar_channel_index) 
+    {
+        case 0: channel_swizzle = pSource->swizzlex; break;
+        case 1: channel_swizzle = pSource->swizzley; break;
+        case 2: channel_swizzle = pSource->swizzlez; break;
+        case 3: channel_swizzle = pSource->swizzlew; break;
+        default: channel_swizzle = SQ_SEL_MASK; break;
+    }
+
+    if(channel_swizzle == SQ_SEL_0) 
+    {
+        src_sel = SQ_ALU_SRC_0; 
+    }
+    else if (channel_swizzle == SQ_SEL_1) 
+    {
+        src_sel = SQ_ALU_SRC_1; 
+    }
+    else 
+    {
+        if ( (pSource->rtype == SRC_REG_TEMPORARY) || 
+             (pSource->rtype == SRC_REG_INPUT)
+        ) 
+        {
+            src_sel = pSource->reg;
+        }
+        else if (pSource->rtype == SRC_REG_CONSTANT)
+        {
+            src_sel = pSource->reg + CFILE_REGISTER_OFFSET;            
+        }
+        else
+        {
+            r700_error(ERROR_ASM_ALUSRCBADTYPE, "Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.", 
+                     source_index, pSource->rtype);
+            return GL_FALSE;
+        }
+    }
+
+    if( ADDR_ABSOLUTE == addrmode_PVSSRC(pSource) ) 
+    {
+        src_rel = SQ_ABSOLUTE;
+    }
+    else 
+    {
+        src_rel = SQ_RELATIVE;
+    }
+
+    switch (channel_swizzle) 
+    {
+        case SQ_SEL_X: 
+            src_chan = SQ_CHAN_X; 
+            break;
+        case SQ_SEL_Y: 
+            src_chan = SQ_CHAN_Y; 
+            break;
+        case SQ_SEL_Z: 
+            src_chan = SQ_CHAN_Z; 
+            break;
+        case SQ_SEL_W: 
+            src_chan = SQ_CHAN_W; 
+            break;
+        case SQ_SEL_0:
+        case SQ_SEL_1:
+            // Does not matter since src_sel controls
+            src_chan = SQ_CHAN_X; 
+            break;
+        default:
+            r700_error(ERROR_ASM_ALUSRCSELECT, "Unknown source select value (%d) in assemble_alu_src().");
+            return GL_FALSE;
+            break;
+    }
+
+    switch (scalar_channel_index) 
+    {
+        case 0: src_neg = pSource->negx; break;
+        case 1: src_neg = pSource->negy; break;
+        case 2: src_neg = pSource->negz; break;
+        case 3: src_neg = pSource->negw; break;
+        default: src_neg = 0; break;
+    }
+
+    switch (source_index) 
+    {
+        case 0:
+            alu_instruction_ptr->m_Word0.f.src0_sel  = src_sel;
+            alu_instruction_ptr->m_Word0.f.src0_rel  = src_rel;
+            alu_instruction_ptr->m_Word0.f.src0_chan = src_chan;
+            alu_instruction_ptr->m_Word0.f.src0_neg  = src_neg;
+            break;
+        case 1:
+            alu_instruction_ptr->m_Word0.f.src1_sel  = src_sel;
+            alu_instruction_ptr->m_Word0.f.src1_rel  = src_rel;
+            alu_instruction_ptr->m_Word0.f.src1_chan = src_chan;
+            alu_instruction_ptr->m_Word0.f.src1_neg  = src_neg;
+            break;
+        case 2:
+            alu_instruction_ptr->m_Word1_OP3.f.src2_sel  = src_sel;
+            alu_instruction_ptr->m_Word1_OP3.f.src2_rel  = src_rel;
+            alu_instruction_ptr->m_Word1_OP3.f.src2_chan = src_chan;
+            alu_instruction_ptr->m_Word1_OP3.f.src2_neg  = src_neg;
+            break;
+        default:
+            r700_error(ERROR_ASM_ALUSRCNUMBER, "Only three sources allowed in ALU opcodes.");
+          return GL_FALSE;
+          break;
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
+                              R700ALUInstruction* alu_instruction_ptr,
+                              GLuint              contiguous_slots_needed)
+{
+    if( GL_FALSE == check_current_clause(pAsm, CF_ALU_CLAUSE) )
+    {
+        return GL_FALSE;
+    }
+
+    if ( pAsm->cf_current_alu_clause_ptr == NULL ||
+         ( (pAsm->cf_current_alu_clause_ptr != NULL) && 
+           (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-contiguous_slots_needed-1) )
+         ) ) 
+    {
+
+        //new cf inst for this clause
+        pAsm->cf_current_alu_clause_ptr = (R700ControlFlowALUClause*) CALLOC_STRUCT(R700ControlFlowALUClause);
+            
+        // link the new cf to cf segment    
+        if(NULL != pAsm->cf_current_alu_clause_ptr) 
+        {
+            Init_R700ControlFlowALUClause(pAsm->cf_current_alu_clause_ptr);
+                       AddCFInstruction( pAsm->pR700Shader, 
+                              (R700ControlFlowInstruction *)pAsm->cf_current_alu_clause_ptr );            
+        }
+        else 
+        {
+            r700_error(ERROR_ASM_ALLOCALUCF, "Could not allocate a new ALU CF instruction.");
+            return GL_FALSE;
+        }
+
+        pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank0 = 0x0;
+        pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank1 = 0x0;
+        pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_mode0 = SQ_CF_KCACHE_NOP;
+
+        pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_mode1 = SQ_CF_KCACHE_NOP;
+        pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0;
+        pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0;
+
+        //cf_current_alu_clause_ptr->m_Word1.f.count           = number_of_scalar_operations - 1;
+        pAsm->cf_current_alu_clause_ptr->m_Word1.f.count           = 0x0;
+        pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst         = SQ_CF_INST_ALU;
+
+        pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+        pAsm->cf_current_alu_clause_ptr->m_Word1.f.barrier         = 0x1;
+    }
+    else 
+    {
+        pAsm->cf_current_alu_clause_ptr->m_Word1.f.count++;
+    }
+
+    // If this clause constains any instruction that is forward dependent on a TEX instruction, 
+    // set the whole_quad_mode for this clause
+    if ( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) ) 
+    {
+        pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x1;   
+    }
+
+    if (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-1) ) 
+    {
+        alu_instruction_ptr->m_Word0.f.last = 1;
+    }
+
+    if(NULL == pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction)
+    {
+        pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction = alu_instruction_ptr;
+        alu_instruction_ptr->m_pLinkedALUClause = pAsm->cf_current_alu_clause_ptr;
+    }
+    
+    AddALUInstruction(pAsm->pR700Shader, alu_instruction_ptr);
+
+    return GL_TRUE;
+}
+
+void get_src_properties(R700ALUInstruction*  alu_instruction_ptr,
+                        int                  source_index,
+                        BITS*                psrc_sel,
+                        BITS*                psrc_rel,
+                        BITS*                psrc_chan,
+                        BITS*                psrc_neg)
+{
+    switch (source_index) 
+    {
+        case 0:
+            *psrc_sel  = alu_instruction_ptr->m_Word0.f.src0_sel ;
+            *psrc_rel  = alu_instruction_ptr->m_Word0.f.src0_rel ;
+            *psrc_chan = alu_instruction_ptr->m_Word0.f.src0_chan;
+            *psrc_neg  = alu_instruction_ptr->m_Word0.f.src0_neg ;
+            break;
+
+        case 1:
+            *psrc_sel  = alu_instruction_ptr->m_Word0.f.src1_sel ;
+            *psrc_rel  = alu_instruction_ptr->m_Word0.f.src1_rel ;
+            *psrc_chan = alu_instruction_ptr->m_Word0.f.src1_chan;
+            *psrc_neg  = alu_instruction_ptr->m_Word0.f.src1_neg ;
+            break;
+
+        case 2:
+            *psrc_sel  = alu_instruction_ptr->m_Word1_OP3.f.src2_sel;
+            *psrc_rel  = alu_instruction_ptr->m_Word1_OP3.f.src2_rel;
+            *psrc_chan = alu_instruction_ptr->m_Word1_OP3.f.src2_chan;
+            *psrc_neg  = alu_instruction_ptr->m_Word1_OP3.f.src2_neg;
+            break;
+    }
+}
+
+int is_cfile(BITS sel) 
+{
+    if (sel > 255 && sel < 512) 
+    {
+        return 1;
+    }
+    return 0;
+}
+
+int is_const(BITS sel) 
+{
+    if (is_cfile(sel)) 
+    {
+        return 1;
+    }
+    else if(sel >= SQ_ALU_SRC_0 && sel <= SQ_ALU_SRC_LITERAL) 
+    {
+        return 1;
+    }
+    return 0;
+}
+
+int is_gpr(BITS sel) 
+{
+    if (sel >= 0 && sel < 128) 
+    {
+        return 1;
+    }
+    return 0;
+}
+
+const GLuint BANK_SWIZZLE_VEC[8] = {SQ_ALU_VEC_210,  //000
+                                    SQ_ALU_VEC_120,  //001
+                                    SQ_ALU_VEC_102,  //010
+
+                                    SQ_ALU_VEC_201,  //011
+                                    SQ_ALU_VEC_012,  //100
+                                    SQ_ALU_VEC_021,  //101
+
+                                    SQ_ALU_VEC_012,  //110
+                                    SQ_ALU_VEC_012}; //111
+
+const GLuint BANK_SWIZZLE_SCL[8] = {SQ_ALU_SCL_210,  //000
+                                    SQ_ALU_SCL_122,  //001 
+                                    SQ_ALU_SCL_122,  //010
+
+                                    SQ_ALU_SCL_221,  //011
+                                    SQ_ALU_SCL_212,  //100
+                                    SQ_ALU_SCL_122,  //101
+
+                                    SQ_ALU_SCL_122,  //110
+                                    SQ_ALU_SCL_122}; //111
+
+GLboolean reserve_cfile(r700_AssemblerBase* pAsm, 
+                        GLuint sel, 
+                        GLuint chan)
+{
+    int res_match = (-1);
+    int res_empty = (-1);
+
+    GLint res;
+
+    for (res=3; res>=0; res--) 
+    {
+        if(pAsm->hw_cfile_addr[ res] < 0)  
+        {
+            res_empty = res;
+        }
+        else if( (pAsm->hw_cfile_addr[res] == (int)sel)
+                 &&
+                 (pAsm->hw_cfile_chan[ res ] == (int) chan) ) 
+        {
+            res_match = res;
+        }
+    }
+
+    if(res_match >= 0) 
+    {
+        // Read for this scalar component already reserved, nothing to do here.
+        ;
+    }
+    else if(res_empty >= 0) 
+    {
+        pAsm->hw_cfile_addr[ res_empty ] = sel;
+        pAsm->hw_cfile_chan[ res_empty ] = chan;
+    }
+    else 
+    {
+        r700_error(ERROR_ASM_CONSTCHANNEL, "All cfile read ports are used, cannot reference C$sel, channel $chan.");
+        return GL_FALSE;
+    }
+    return GL_TRUE;
+}
+
+GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle)
+{
+    if(pAsm->hw_gpr[cycle][chan] < 0) 
+    {
+        pAsm->hw_gpr[cycle][chan] = sel;
+    }
+    else if(pAsm->hw_gpr[cycle][chan] != (int)sel) 
+    {
+        r700_error(ERROR_ASM_BADGPRRESERVE, "Another scalar operation has already used GPR read port for given channel");
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
+{
+    switch (swiz) 
+    {
+        case SQ_ALU_SCL_210:
+            {
+                int table[3] = {2,     1,      0};
+                *pCycle = table[sel];
+                return GL_TRUE;
+            }
+            break;
+        case SQ_ALU_SCL_122:
+            {
+                int table[3] = {1,     2,      2};
+                *pCycle = table[sel];
+                return GL_TRUE;
+            }
+            break;
+        case SQ_ALU_SCL_212:
+            {  
+                int table[3] = {2,     1,      2};
+                *pCycle = table[sel];
+                return GL_TRUE;
+            }
+            break;
+        case SQ_ALU_SCL_221:
+            {
+                int table[3] = {2, 2, 1};
+                *pCycle = table[sel];
+                return GL_TRUE;
+            }
+            break;
+        default:
+            r700_error(ERROR_ASM_BADSCALARBZ, "Bad Scalar bank swizzle value");
+            break;
+    }
+
+    return GL_FALSE;
+}
+
+GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
+{
+    switch (swiz) 
+    {
+        case SQ_ALU_VEC_012:
+            {
+                int table[3] = {0, 1, 2};
+                *pCycle = table[sel];
+            }
+            break;
+        case SQ_ALU_VEC_021:
+            {
+                int table[3] = {0, 2,  1};
+                *pCycle = table[sel];
+            }
+            break;        
+        case SQ_ALU_VEC_120:
+            {
+                int table[3] = {1, 2,  0};
+                *pCycle = table[sel];
+            }
+            break;
+        case SQ_ALU_VEC_102:
+            {
+                int table[3] = {1, 0,  2};
+                *pCycle = table[sel];
+            }
+            break;
+        case SQ_ALU_VEC_201:
+            {
+                int table[3] = {2, 0,  1};
+                *pCycle = table[sel];
+            }
+            break;
+        case SQ_ALU_VEC_210:
+            {
+                int table[3] = {2, 1,  0};
+                *pCycle = table[sel];
+            }
+            break;
+        default:
+            r700_error(ERROR_ASM_BADVECTORBZ, "Bad Vec bank swizzle value");
+            return GL_FALSE;
+            break;
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean check_scalar(r700_AssemblerBase* pAsm,
+                       R700ALUInstruction* alu_instruction_ptr)
+{
+    GLuint cycle;
+    GLuint bank_swizzle;
+    GLuint const_count = 0;
+
+    BITS sel;
+    BITS chan;
+    BITS rel;
+    BITS neg;
+
+    GLuint src;
+
+    BITS src_sel [3] = {0,0,0};
+    BITS src_chan[3] = {0,0,0};
+    BITS src_rel [3] = {0,0,0};
+    BITS src_neg [3] = {0,0,0};
+
+    GLuint swizzle_key;
+
+    GLuint number_of_operands = r700GetNumOperands(pAsm);
+
+    for (src=0; src<number_of_operands; src++) 
+    {
+        get_src_properties(alu_instruction_ptr,
+                           src,
+                           &(src_sel[src]), 
+                           &(src_rel[src]), 
+                           &(src_chan[src]), 
+                           &(src_neg[src]) );
+    }
+
+
+    swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) + 
+                    (is_const( src_sel[1] ) ? 2 : 0) + 
+                    (is_const( src_sel[2] ) ? 1 : 0) );
+  
+    alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_SCL[ swizzle_key ];
+
+    for (src=0; src<number_of_operands; src++) 
+    {
+        sel  = src_sel [src];
+        chan = src_chan[src];
+        rel  = src_rel [src];
+        neg  = src_neg [src];
+
+        if (is_const( sel )) 
+        {
+            // Any constant, including literal and inline constants
+            const_count++;
+
+            if (is_cfile( sel )) 
+            {
+                reserve_cfile(pAsm, sel, chan);
+            }
+
+        }
+    }
+
+    for (src=0; src<number_of_operands; src++) 
+    {
+        sel  = src_sel [src];
+        chan = src_chan[src];
+        rel  = src_rel [src];
+        neg  = src_neg [src];
+
+        if( is_gpr(sel) ) 
+        {
+            bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
+
+            if( GL_FALSE == cycle_for_scalar_bank_swizzle(bank_swizzle, src, &cycle) )
+            {
+                return GL_FALSE;
+            }
+
+            if(cycle < const_count) 
+            {
+                if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
+                {
+                    return GL_FALSE;
+                }
+            }
+        }
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean check_vector(r700_AssemblerBase* pAsm,
+                       R700ALUInstruction* alu_instruction_ptr)
+{
+    GLuint cycle;
+    GLuint bank_swizzle;
+    GLuint const_count = 0;
+
+    GLuint src;
+
+    BITS sel;
+    BITS chan;
+    BITS rel;
+    BITS neg;
+
+    BITS src_sel [3] = {0,0,0};
+    BITS src_chan[3] = {0,0,0};
+    BITS src_rel [3] = {0,0,0};
+    BITS src_neg [3] = {0,0,0};
+
+    GLuint swizzle_key;
+
+    GLuint number_of_operands = r700GetNumOperands(pAsm);
+
+    for (src=0; src<number_of_operands; src++) 
+    {
+        get_src_properties(alu_instruction_ptr,
+                           src,
+                           &(src_sel[src]), 
+                           &(src_rel[src]), 
+                           &(src_chan[src]), 
+                           &(src_neg[src]) );
+    }
+
+
+    swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) + 
+                           (is_const( src_sel[1] ) ? 2 : 0) + 
+                           (is_const( src_sel[2] ) ? 1 : 0) 
+                         );
+
+    alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_VEC[swizzle_key];
+
+    for (src=0; src<number_of_operands; src++) 
+    {
+        sel  = src_sel [src];
+        chan = src_chan[src];
+        rel  = src_rel [src];
+        neg  = src_neg [src];
+
+
+        bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
+
+        if( is_gpr(sel) ) 
+        {
+            if( GL_FALSE == cycle_for_vector_bank_swizzle(bank_swizzle, src, &cycle) )
+            {
+                return GL_FALSE;
+            }
+
+            if ( (src  == 1)          && 
+                 (sel  == src_sel[0]) &&
+                 (chan == src_chan[0]) ) 
+            {        
+            }
+            else 
+            {
+                if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
+                {
+                    return GL_FALSE;
+                }
+            }
+        }
+        else if( is_const(sel) ) 
+        {                  
+            const_count++;
+
+            if( is_cfile(sel) ) 
+            {        
+                if( GL_FALSE == reserve_cfile(pAsm, sel, chan) )
+                {
+                    return GL_FALSE;
+                }
+            }
+        }
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
+{
+    GLuint    number_of_scalar_operations;
+    GLboolean is_single_scalar_operation;
+    GLuint    scalar_channel_index;
+
+    PVSSRC * pcurrent_source;
+    int    current_source_index;
+    GLuint contiguous_slots_needed;
+
+    GLuint    uNumSrc = r700GetNumOperands(pAsm);
+    GLuint    channel_swizzle, j;
+    GLuint    chan_counter[4] = {0, 0, 0, 0};
+    PVSSRC *  pSource[3];
+    GLboolean bSplitInst = GL_FALSE;
+
+    if (1 == pAsm->D.dst.math) 
+    {
+        is_single_scalar_operation = GL_TRUE;
+        number_of_scalar_operations = 1;
+    }
+    else 
+    {
+        is_single_scalar_operation = GL_FALSE;
+        number_of_scalar_operations = 4;
+        
+        /* check read port, only very preliminary algorithm, not count in 
+           src0/1 same comp case and prev slot repeat case; also not count relative
+           addressing. TODO: improve performance. */
+        for(j=0; j<uNumSrc; j++)
+        {
+            pSource[j] = &(pAsm->S[j].src);
+        }
+        for(scalar_channel_index=0; scalar_channel_index<4; scalar_channel_index++) 
+        {
+            for(j=0; j<uNumSrc; j++) 
+            {
+                switch (scalar_channel_index) 
+                {
+                    case 0: channel_swizzle = pSource[j]->swizzlex; break;
+                    case 1: channel_swizzle = pSource[j]->swizzley; break;
+                    case 2: channel_swizzle = pSource[j]->swizzlez; break;
+                    case 3: channel_swizzle = pSource[j]->swizzlew; break;
+                    default: channel_swizzle = SQ_SEL_MASK; break;
+                }
+                if ( ((pSource[j]->rtype == SRC_REG_TEMPORARY) || 
+                     (pSource[j]->rtype == SRC_REG_INPUT))
+                     && (channel_swizzle <= SQ_SEL_W) )
+                {                    
+                    chan_counter[channel_swizzle]++;                        
+                }
+            }
+        }
+        if(   (chan_counter[SQ_SEL_X] > 3)
+           || (chan_counter[SQ_SEL_Y] > 3)
+           || (chan_counter[SQ_SEL_Z] > 3)
+           || (chan_counter[SQ_SEL_W] > 3) ) /* each chan bank has only 3 ports. */
+        {
+            bSplitInst = GL_TRUE;
+        }
+    }
+
+    contiguous_slots_needed = 0;
+
+    if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) ) 
+    {
+        contiguous_slots_needed = 4;
+    }
+
+    initialize(pAsm);    
+
+    for (scalar_channel_index=0;
+            scalar_channel_index < number_of_scalar_operations; 
+                scalar_channel_index++) 
+    {
+        R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
+        if (alu_instruction_ptr == NULL) 
+               {
+                       return GL_FALSE;
+               }
+        Init_R700ALUInstruction(alu_instruction_ptr);
+        
+        //src 0
+        current_source_index = 0;
+        pcurrent_source = &(pAsm->S[0].src);
+
+        if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
+                                         current_source_index,
+                                         pcurrent_source, 
+                                         scalar_channel_index) )     
+        {
+            return GL_FALSE;
+        }
+   
+        if (pAsm->D.dst.math == 0) 
+        {            
+            // Process source 1            
+            current_source_index = 1;
+            pcurrent_source = &(pAsm->S[current_source_index].src);
+
+            if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
+                                             current_source_index,
+                                             pcurrent_source, 
+                                             scalar_channel_index) ) 
+            {
+                return GL_FALSE;
+            }
+        }
+
+        //other bits
+        alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP;
+
+        if(   (is_single_scalar_operation == GL_TRUE) 
+           || (GL_TRUE == bSplitInst) )
+        {
+            alu_instruction_ptr->m_Word0.f.last = 1;
+        }
+        else 
+        {
+            alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ?  1 : 0;
+        }
+
+        alu_instruction_ptr->m_Word0.f.pred_sel                = 0x0;
+        alu_instruction_ptr->m_Word1_OP2.f.update_pred         = 0x0;  
+        alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
+
+        // dst
+        if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) || 
+            (pAsm->D.dst.rtype == DST_REG_OUT) ) 
+        {
+            alu_instruction_ptr->m_Word1.f.dst_gpr  = pAsm->D.dst.reg;
+        }
+        else 
+        {
+            r700_error(ERROR_ASM_ALUDSTBADTYPE, "Only temp destination registers supported for ALU dest regs.");
+            return GL_FALSE;
+        }
+
+        alu_instruction_ptr->m_Word1.f.dst_rel  = SQ_ABSOLUTE;  //D.rtype
+
+        if ( is_single_scalar_operation == GL_TRUE ) 
+        {
+            // Override scalar_channel_index since only one scalar value will be written
+            if(pAsm->D.dst.writex) 
+            {
+                scalar_channel_index = 0;
+            }
+            else if(pAsm->D.dst.writey) 
+            {
+                scalar_channel_index = 1;
+            }
+            else if(pAsm->D.dst.writez) 
+            {
+                scalar_channel_index = 2;
+            }
+            else if(pAsm->D.dst.writew) 
+            {
+                scalar_channel_index = 3;
+            }
+        }
+
+        alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index;
+
+        alu_instruction_ptr->m_Word1.f.clamp    = pAsm->pILInst[pAsm->uiCurInst].SaturateMode;
+
+        if (pAsm->D.dst.op3) 
+        {            
+            //op3
+
+            alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode;
+
+            //There's 3rd src for op3
+            current_source_index = 2;
+            pcurrent_source = &(pAsm->S[current_source_index].src);
+
+            if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr,
+                                              current_source_index,
+                                              pcurrent_source, 
+                                              scalar_channel_index) ) 
+            {
+                return GL_FALSE;
+            }
+        }
+        else 
+        {
+            //op2
+            if (pAsm->bR6xx)
+            {
+                alu_instruction_ptr->m_Word1_OP2.f6.alu_inst           = pAsm->D.dst.opcode;
+
+                alu_instruction_ptr->m_Word1_OP2.f6.src0_abs           = 0x0;
+                alu_instruction_ptr->m_Word1_OP2.f6.src1_abs           = 0x0;
+
+                //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
+                //alu_instruction_ptr->m_Word1_OP2.f6.update_pred         = 0x0;
+                switch (scalar_channel_index) 
+                {
+                    case 0: 
+                        alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writex; 
+                        break;
+                    case 1: 
+                        alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writey; 
+                        break;
+                    case 2: 
+                        alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writez; 
+                        break;
+                    case 3: 
+                        alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew; 
+                        break;
+                    default: 
+                        alu_instruction_ptr->m_Word1_OP2.f6.write_mask = SQ_SEL_MASK; 
+                        break;
+                }            
+                alu_instruction_ptr->m_Word1_OP2.f6.omod               = SQ_ALU_OMOD_OFF;
+            }
+            else
+            {
+                alu_instruction_ptr->m_Word1_OP2.f.alu_inst           = pAsm->D.dst.opcode;
+
+                alu_instruction_ptr->m_Word1_OP2.f.src0_abs           = 0x0;
+                alu_instruction_ptr->m_Word1_OP2.f.src1_abs           = 0x0;
+
+                //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
+                //alu_instruction_ptr->m_Word1_OP2.f.update_pred         = 0x0;
+                switch (scalar_channel_index) 
+                {
+                    case 0: 
+                        alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writex; 
+                        break;
+                    case 1: 
+                        alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writey; 
+                        break;
+                    case 2: 
+                        alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writez; 
+                        break;
+                    case 3: 
+                        alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew; 
+                        break;
+                    default: 
+                        alu_instruction_ptr->m_Word1_OP2.f.write_mask = SQ_SEL_MASK; 
+                        break;
+                }            
+                alu_instruction_ptr->m_Word1_OP2.f.omod               = SQ_ALU_OMOD_OFF;
+            }
+        }
+
+        if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
+        {
+            return GL_FALSE;
+        }
+
+        /*
+         * Judge the type of current instruction, is it vector or scalar 
+         * instruction.
+         */        
+        if (is_single_scalar_operation) 
+        {
+            if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) )
+            {
+                return GL_FALSE;
+            }
+        }
+        else 
+        {
+            if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
+            {
+                return 1;
+            }
+        }
+
+        contiguous_slots_needed = 0;
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean next_ins(r700_AssemblerBase *pAsm)
+{
+    struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
+
+    if( GL_TRUE == IsTex(pILInst->Opcode) )
+    {
+        if( GL_FALSE == assemble_tex_instruction(pAsm) ) 
+        {
+            r700_error(ERROR_ASM_TEXINSTRUCTION, "Error assembling TEX instruction");
+            return GL_FALSE;
+        }
+    }
+    else 
+    {   //ALU      
+        if( GL_FALSE == assemble_alu_instruction(pAsm) ) 
+        {
+            r700_error(ERROR_ASM_TEXINSTRUCTION, "Error assembling ALU instruction");
+            return GL_FALSE;
+        }
+    } 
+      
+    if(pAsm->D.dst.rtype == DST_REG_OUT) 
+    {
+        if(pAsm->D.dst.op3) 
+        {        
+            // There is no mask for OP3 instructions, so all channels are written        
+            pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF;
+        }
+        else 
+        {
+            pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] 
+               |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask;
+        }
+    }
+    
+    //reset for next inst.
+    pAsm->D.bits    = 0;
+    pAsm->S[0].bits = 0;
+    pAsm->S[1].bits = 0;
+    pAsm->S[2].bits = 0;
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode)
+{
+    BITS tmp;
+
+    checkop1(pAsm);
+
+    tmp = gethelpr(pAsm);
+
+    // opcode  tmp.x,    a.x
+    // MOV     dst,      tmp.x
+
+    pAsm->D.dst.opcode = opcode;
+    pAsm->D.dst.math = 1;
+
+    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+    pAsm->D.dst.rtype  = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg    = tmp;
+    pAsm->D.dst.writex = 1;
+
+    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+        return GL_FALSE;
+    }
+
+    if ( GL_FALSE == next_ins(pAsm) ) 
+    {
+        return GL_FALSE;
+    }
+
+    // Now replicate result to all necessary channels in destination
+    pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+    if( GL_FALSE == assemble_dst(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+    pAsm->S[0].src.reg   = tmp;
+
+    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_ABS(r700_AssemblerBase *pAsm)
+{
+    checkop1(pAsm);
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_MAX;  
+
+    if( GL_FALSE == assemble_dst(pAsm) )
+    {
+        return GL_FALSE;
+    }
+    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+        return GL_FALSE;
+    }
+    pAsm->S[1].bits = pAsm->S[0].bits;
+    flipneg_PVSSRC(&(pAsm->S[1].src));
+
+    if ( GL_FALSE == next_ins(pAsm) ) 
+    {
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_ADD(r700_AssemblerBase *pAsm)
+{
+    if( GL_FALSE == checkop2(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
+    if( GL_FALSE == assemble_dst(pAsm) )
+    {
+        return GL_FALSE;
+    }
+    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+    {
+        return GL_FALSE;
+    }
+
+    if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_SUB)
+    {
+        flipneg_PVSSRC(&(pAsm->S[1].src));
+    }
+
+    if( GL_FALSE == next_ins(pAsm) ) 
+    {
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_BAD(char *opcode_str) 
+{
+    r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction (%s)", opcode_str);
+    return GL_FALSE;
+}
+
+GLboolean assemble_CMP(r700_AssemblerBase *pAsm)
+{
+    int tmp;
+
+    if( GL_FALSE == checkop3(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE;
+    pAsm->D.dst.op3     = 1;  
+
+    tmp = (-1);
+
+    if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
+    {
+        //OP3 has no support for write mask
+        tmp = gethelpr(pAsm);
+
+        setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+        pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+        pAsm->D.dst.reg   = tmp;
+
+        nomask_PVSDST(&(pAsm->D.dst));
+    }
+    else 
+    {
+        if( GL_FALSE == assemble_dst(pAsm) )
+        {
+            return GL_FALSE;
+        }
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+        return GL_FALSE;
+    }
+              
+    if( GL_FALSE == assemble_src(pAsm, 2, 1) )  
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 1, 2) ) 
+    {
+        return GL_FALSE;
+    }
+
+    if ( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    if (0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask) 
+    {
+        if( GL_FALSE == assemble_dst(pAsm) )
+        {
+            return GL_FALSE;
+        }
+
+        pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+        //tmp for source
+        setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+        pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+        pAsm->S[0].src.reg   = tmp;
+
+        noneg_PVSSRC(&(pAsm->S[0].src));
+        noswizzle_PVSSRC(&(pAsm->S[0].src));
+
+        if( GL_FALSE == next_ins(pAsm) )
+        {
+            return GL_FALSE;
+        }
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_COS(r700_AssemblerBase *pAsm)
+{
+    return assemble_math_function(pAsm, SQ_OP2_INST_COS);
+}
+GLboolean assemble_DOT(r700_AssemblerBase *pAsm)
+{
+    if( GL_FALSE == checkop2(pAsm) )
+    {
+        return GL_FALSE;
+    }
+    pAsm->D.dst.opcode = SQ_OP2_INST_DOT4;  
+
+    if( GL_FALSE == assemble_dst(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+    {
+        return GL_FALSE;
+    }
+
+    if(OPCODE_DP3 == pAsm->pILInst[pAsm->uiCurInst].Opcode)
+    {
+        zerocomp_PVSSRC(&(pAsm->S[0].src), 3);
+        zerocomp_PVSSRC(&(pAsm->S[1].src), 3);
+    }
+    else if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_DPH) 
+    {
+        onecomp_PVSSRC(&(pAsm->S[1].src), 3);
+    } 
+
+    if ( GL_FALSE == next_ins(pAsm) ) 
+    {
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+GLboolean assemble_DST(r700_AssemblerBase *pAsm)
+{
+    if( GL_FALSE == checkop2(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
+
+    if( GL_FALSE == assemble_dst(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+    {
+        return GL_FALSE;
+    }
+
+    onecomp_PVSSRC(&(pAsm->S[0].src), 0);
+    onecomp_PVSSRC(&(pAsm->S[0].src), 3);
+
+    onecomp_PVSSRC(&(pAsm->S[1].src), 0);
+    onecomp_PVSSRC(&(pAsm->S[1].src), 2);
+
+    if ( GL_FALSE == next_ins(pAsm) ) 
+    {
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_EX2(r700_AssemblerBase *pAsm)
+{
+    return assemble_math_function(pAsm, SQ_OP2_INST_EXP_IEEE);
+}
+GLboolean assemble_FLR(r700_AssemblerBase *pAsm)
+{
+    checkop1(pAsm);
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;  
+
+    if ( GL_FALSE == assemble_dst(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+        return GL_FALSE;
+    }
+
+    if ( GL_FALSE == next_ins(pAsm) ) 
+    {
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm)
+{
+    return assemble_math_function(pAsm, SQ_OP2_INST_FLT_TO_INT);
+}
+
+GLboolean assemble_FRC(r700_AssemblerBase *pAsm)
+{
+    checkop1(pAsm);
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_FRACT; 
+
+    if ( GL_FALSE == assemble_dst(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+        return GL_FALSE;
+    }
+
+    if ( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+GLboolean assemble_KIL(r700_AssemblerBase *pAsm)
+{
+    checkop1(pAsm);
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_KILLGT;  
+  
+    if ( GL_FALSE == assemble_dst(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->D.dst.writex = 0;
+    pAsm->D.dst.writey = 0;
+    pAsm->D.dst.writez = 0;
+    pAsm->D.dst.writew = 0;
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+    pAsm->S[0].src.reg = 0;
+
+    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+
+    pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
+
+    if(PROGRAM_TEMPORARY == pAsm->pILInst[pAsm->uiCurInst].DstReg.File)
+    {
+        pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number;
+    }
+    else
+    {   //PROGRAM_OUTPUT
+        pAsm->S[1].src.reg = pAsm->uiFP_OutputMap[pAsm->pILInst[pAsm->uiCurInst].DstReg.Index];
+    }
+  
+    setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+    noswizzle_PVSSRC(&(pAsm->S[1].src));
+  
+    if ( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->pR700Shader->killIsUsed = GL_TRUE;
+    
+    return GL_TRUE;
+}
+
+GLboolean assemble_LG2(r700_AssemblerBase *pAsm) 
+{ 
+    return assemble_math_function(pAsm, SQ_OP2_INST_LOG_IEEE);
+}
+
+GLboolean assemble_LRP(r700_AssemblerBase *pAsm) 
+{
+    BITS tmp;
+
+    if( GL_FALSE == checkop3(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    tmp = gethelpr(pAsm);
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
+
+    pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg   = tmp;
+    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+    nomask_PVSDST(&(pAsm->D.dst));
+
+          
+    if( GL_FALSE == assemble_src(pAsm, 1, 0) ) 
+    {
+           return GL_FALSE;
+    }
+
+    if ( GL_FALSE == assemble_src(pAsm, 2, 1) )   
+    {
+           return GL_FALSE;
+    }
+
+    neg_PVSSRC(&(pAsm->S[1].src));
+
+    if( GL_FALSE == next_ins(pAsm) ) 
+    {
+           return GL_FALSE;
+    }
+
+    pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+    pAsm->D.dst.op3    = 1;
+
+    pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg = tmp;
+    nomask_PVSDST(&(pAsm->D.dst));
+    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+    pAsm->S[0].src.reg = tmp;
+    noswizzle_PVSSRC(&(pAsm->S[0].src));
+
+
+    if( GL_FALSE == assemble_src(pAsm, 0, 1) ) 
+    {
+        return GL_FALSE;
+    }
+    if( GL_FALSE == assemble_src(pAsm, 2, -1) ) 
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == next_ins(pAsm) ) 
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+    if( GL_FALSE == assemble_dst(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+    pAsm->S[0].src.reg = tmp;
+    noswizzle_PVSSRC(&(pAsm->S[0].src));
+
+    if( GL_FALSE == next_ins(pAsm) ) 
+    {
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm) 
+{
+    int tmp, ii;
+    GLboolean bReplaceDst = GL_FALSE;
+    struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
+
+       if( GL_FALSE == checkop3(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+       pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;  
+       pAsm->D.dst.op3     = 1; 
+
+       tmp = (-1);
+
+    if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
+    {   /* TODO : more investigation on MAD src and dst using same register */
+        for(ii=0; ii<3; ii++)
+        {
+            if(   (PROGRAM_TEMPORARY == pILInst->SrcReg[ii].File)
+               && (pILInst->DstReg.Index == pILInst->SrcReg[ii].Index) )
+            {
+                bReplaceDst = GL_TRUE;
+                break;
+            }
+        }
+    }
+    if(0xF != pILInst->DstReg.WriteMask)
+    {   /* OP3 has no support for write mask */
+        bReplaceDst = GL_TRUE;
+    }
+
+       if(GL_TRUE == bReplaceDst)
+    {
+        tmp = gethelpr(pAsm);
+
+        setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+        pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+        pAsm->D.dst.reg   = tmp;
+
+        nomask_PVSDST(&(pAsm->D.dst));
+    }
+    else 
+    {
+        if( GL_FALSE == assemble_dst(pAsm) )
+        {
+            return GL_FALSE;
+        }
+    }
+
+       if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+        return GL_FALSE;
+    }
+              
+    if( GL_FALSE == assemble_src(pAsm, 1, -1) )  
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 2, -1) ) 
+    {
+        return GL_FALSE;
+    }
+
+    if ( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+       if (GL_TRUE == bReplaceDst) 
+    {
+        if( GL_FALSE == assemble_dst(pAsm) )
+        {
+            return GL_FALSE;
+        }
+
+        pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+        //tmp for source
+        setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+        pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+        pAsm->S[0].src.reg   = tmp;
+
+        noneg_PVSSRC(&(pAsm->S[0].src));
+        noswizzle_PVSSRC(&(pAsm->S[0].src));
+
+        if( GL_FALSE == next_ins(pAsm) )
+        {
+            return GL_FALSE;
+        }
+    }
+
+    return GL_TRUE;
+}
+
+/* LIT dst, src */
+GLboolean assemble_LIT(r700_AssemblerBase *pAsm)
+{
+    unsigned int dstReg;
+    unsigned int dstType;
+    unsigned int srcReg;
+    unsigned int srcType;
+    checkop1(pAsm);
+    int tmp = gethelpr(pAsm);
+
+    if( GL_FALSE == assemble_dst(pAsm) )
+    {
+        return GL_FALSE;
+    }
+    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+        return GL_FALSE;
+    }
+    dstReg  = pAsm->D.dst.reg;
+    dstType = pAsm->D.dst.rtype;
+    srcReg  = pAsm->S[0].src.reg;
+    srcType = pAsm->S[0].src.rtype;
+
+    /* dst.xw, <- 1.0  */
+    pAsm->D.dst.opcode   = SQ_OP2_INST_MOV;
+    pAsm->D.dst.rtype    = dstType;
+    pAsm->D.dst.reg      = dstReg;
+    pAsm->D.dst.writex   = 1;
+    pAsm->D.dst.writey   = 0;
+    pAsm->D.dst.writez   = 0;
+    pAsm->D.dst.writew   = 1;
+    pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+    pAsm->S[0].src.reg   = tmp;
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+    pAsm->S[0].src.swizzlex = SQ_SEL_1;
+    pAsm->S[0].src.swizzley = SQ_SEL_1;
+    pAsm->S[0].src.swizzlez = SQ_SEL_1;
+    pAsm->S[0].src.swizzlew = SQ_SEL_1;
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    /* dst.y = max(src.x, 0.0) */
+    pAsm->D.dst.opcode   = SQ_OP2_INST_MAX;
+    pAsm->D.dst.rtype    = dstType;
+    pAsm->D.dst.reg      = dstReg;
+    pAsm->D.dst.writex   = 0;
+    pAsm->D.dst.writey   = 1;
+    pAsm->D.dst.writez   = 0;
+    pAsm->D.dst.writew   = 0;
+    pAsm->S[0].src.rtype = srcType;
+    pAsm->S[0].src.reg   = srcReg;
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+    pAsm->S[0].src.swizzlex = SQ_SEL_X;
+    pAsm->S[0].src.swizzley = SQ_SEL_X;
+    pAsm->S[0].src.swizzlez = SQ_SEL_X;
+    pAsm->S[0].src.swizzlew = SQ_SEL_X;
+    pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
+    pAsm->S[1].src.reg   = tmp;
+    setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+    noneg_PVSSRC(&(pAsm->S[1].src));
+    pAsm->S[1].src.swizzlex = SQ_SEL_0;
+    pAsm->S[1].src.swizzley = SQ_SEL_0;
+    pAsm->S[1].src.swizzlez = SQ_SEL_0;
+    pAsm->S[1].src.swizzlew = SQ_SEL_0;
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    /* dst.w = log(src.y) */
+    pAsm->D.dst.opcode   = SQ_OP2_INST_LOG_CLAMPED;
+    pAsm->D.dst.math     = 1;
+    pAsm->D.dst.rtype    = dstType;
+    pAsm->D.dst.reg      = dstReg;
+    pAsm->D.dst.writex   = 0;
+    pAsm->D.dst.writey   = 0;
+    pAsm->D.dst.writez   = 0;
+    pAsm->D.dst.writew   = 1;
+    pAsm->S[0].src.rtype = srcType;
+    pAsm->S[0].src.reg   = srcReg;
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+    pAsm->S[0].src.swizzlex = SQ_SEL_Y;
+    pAsm->S[0].src.swizzley = SQ_SEL_Y;
+    pAsm->S[0].src.swizzlez = SQ_SEL_Y;
+    pAsm->S[0].src.swizzlew = SQ_SEL_Y;
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    /* tmp.x = amd MUL_LIT(src.w, dst.w, src.x ) */
+    pAsm->D.dst.opcode   = SQ_OP3_INST_MUL_LIT;
+    pAsm->D.dst.op3      = 1;
+    pAsm->D.dst.rtype    = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg      = tmp;
+    pAsm->D.dst.writex   = 1;
+    pAsm->D.dst.writey   = 0;
+    pAsm->D.dst.writez   = 0;
+    pAsm->D.dst.writew   = 0;
+
+    pAsm->S[0].src.rtype = srcType;
+    pAsm->S[0].src.reg   = srcReg;
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+    pAsm->S[0].src.swizzlex = SQ_SEL_W;
+    pAsm->S[0].src.swizzley = SQ_SEL_W;
+    pAsm->S[0].src.swizzlez = SQ_SEL_W;
+    pAsm->S[0].src.swizzlew = SQ_SEL_W;
+
+    pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
+    pAsm->S[1].src.reg   = dstReg;
+    setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+    noneg_PVSSRC(&(pAsm->S[1].src));
+    pAsm->S[1].src.swizzlex = SQ_SEL_W;
+    pAsm->S[1].src.swizzley = SQ_SEL_W;
+    pAsm->S[1].src.swizzlez = SQ_SEL_W;
+    pAsm->S[1].src.swizzlew = SQ_SEL_W;
+
+    pAsm->S[2].src.rtype = srcType;
+    pAsm->S[2].src.reg   = srcReg;
+    setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
+    noneg_PVSSRC(&(pAsm->S[2].src));
+    pAsm->S[2].src.swizzlex = SQ_SEL_X;
+    pAsm->S[2].src.swizzley = SQ_SEL_X;
+    pAsm->S[2].src.swizzlez = SQ_SEL_X;
+    pAsm->S[2].src.swizzlew = SQ_SEL_X;
+
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    /* dst.z = exp(tmp.x) */
+    pAsm->D.dst.opcode   = SQ_OP2_INST_EXP_IEEE;
+    pAsm->D.dst.math     = 1;
+    pAsm->D.dst.rtype    = dstType;
+    pAsm->D.dst.reg      = dstReg;
+    pAsm->D.dst.writex   = 0;
+    pAsm->D.dst.writey   = 0;
+    pAsm->D.dst.writez   = 1;
+    pAsm->D.dst.writew   = 0;
+
+    pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+    pAsm->S[0].src.reg   = tmp;
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+    pAsm->S[0].src.swizzlex = SQ_SEL_X;
+    pAsm->S[0].src.swizzley = SQ_SEL_X;
+    pAsm->S[0].src.swizzlez = SQ_SEL_X;
+    pAsm->S[0].src.swizzlew = SQ_SEL_X;
+
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+GLboolean assemble_MAX(r700_AssemblerBase *pAsm) 
+{
+       if( GL_FALSE == checkop2(pAsm) )
+       {
+               return GL_FALSE;
+       }
+
+       pAsm->D.dst.opcode = SQ_OP2_INST_MAX; 
+       
+       if( GL_FALSE == assemble_dst(pAsm) )
+       {
+               return GL_FALSE;
+       }
+
+       if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+       {
+               return GL_FALSE;
+       }
+
+       if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+       {
+               return GL_FALSE;
+       }
+
+       if( GL_FALSE == next_ins(pAsm) )
+       {
+               return GL_FALSE;
+       }
+
+    return GL_TRUE;
+}
+GLboolean assemble_MIN(r700_AssemblerBase *pAsm) 
+{
+       if( GL_FALSE == checkop2(pAsm) )
+       {
+               return GL_FALSE;
+       }
+
+       pAsm->D.dst.opcode = SQ_OP2_INST_MIN;  
+
+       if( GL_FALSE == assemble_dst(pAsm) )
+       {
+               return GL_FALSE;
+       }
+
+       if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+       {
+               return GL_FALSE;
+       }
+
+       if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+       {
+               return GL_FALSE;
+       }
+       if( GL_FALSE == next_ins(pAsm) )
+       {
+               return GL_FALSE;
+       }
+
+    return GL_TRUE;
+}
+GLboolean assemble_MOV(r700_AssemblerBase *pAsm) 
+{
+    checkop1(pAsm);
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+    if (GL_FALSE == assemble_dst(pAsm))
+    {
+        return GL_FALSE;
+    }
+
+    if (GL_FALSE == assemble_src(pAsm, 0, -1))
+    {
+        return GL_FALSE;
+    }
+
+    if ( GL_FALSE == next_ins(pAsm) ) 
+    {
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+GLboolean assemble_MUL(r700_AssemblerBase *pAsm) 
+{
+       if( GL_FALSE == checkop2(pAsm) )
+       {
+               return GL_FALSE;
+       }
+
+       pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
+
+       if( GL_FALSE == assemble_dst(pAsm) )
+       {
+               return GL_FALSE;
+       }
+
+       if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+       {
+               return GL_FALSE;
+       }
+
+       if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+       {
+               return GL_FALSE;
+       }
+
+       if( GL_FALSE == next_ins(pAsm) ) 
+       {
+               return GL_FALSE;
+       }
+
+    return GL_TRUE;
+}
+GLboolean assemble_POW(r700_AssemblerBase *pAsm) 
+{
+    BITS tmp;
+
+    checkop1(pAsm);
+
+    tmp = gethelpr(pAsm);
+
+    // LG2 tmp.x,     a.swizzle
+    pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;  
+    pAsm->D.dst.math = 1;
+
+    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+    pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg   = tmp;
+    nomask_PVSDST(&(pAsm->D.dst));
+
+    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == next_ins(pAsm) ) 
+    {
+        return GL_FALSE;
+    }
+
+    // MUL tmp.x,     tmp.x, b.swizzle
+    pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
+
+    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+    pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg = tmp;
+    nomask_PVSDST(&(pAsm->D.dst));
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+    pAsm->S[0].src.reg = tmp;
+    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+
+    if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == next_ins(pAsm) ) 
+    {
+        return GL_FALSE;
+    }
+
+    // EX2 dst.mask,          tmp.x
+    // EX2 tmp.x,             tmp.x
+    pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
+    pAsm->D.dst.math = 1;
+
+    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+    pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg = tmp;
+    nomask_PVSDST(&(pAsm->D.dst));
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+    pAsm->S[0].src.reg = tmp;
+    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+
+    if( GL_FALSE == next_ins(pAsm) ) 
+    {
+        return GL_FALSE;
+    }
+
+    // Now replicate result to all necessary channels in destination
+    pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+    if( GL_FALSE == assemble_dst(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+    pAsm->S[0].src.reg   = tmp;
+
+    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+GLboolean assemble_RCP(r700_AssemblerBase *pAsm) 
+{
+    return assemble_math_function(pAsm, SQ_OP2_INST_RECIP_IEEE);
+}
+GLboolean assemble_RSQ(r700_AssemblerBase *pAsm) 
+{
+    return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE);
+}
+GLboolean assemble_SIN(r700_AssemblerBase *pAsm) 
+{
+    return assemble_math_function(pAsm, SQ_OP2_INST_SIN);
+}
+GLboolean assemble_SCS(r700_AssemblerBase *pAsm) 
+{
+    BITS tmp;
+
+       checkop1(pAsm);
+
+       tmp = gethelpr(pAsm);
+
+       // COS tmp.x,    a.x
+       pAsm->D.dst.opcode = SQ_OP2_INST_COS;
+       pAsm->D.dst.math = 1;
+
+       setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+       pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+       pAsm->D.dst.reg = tmp;
+       pAsm->D.dst.writex = 1;
+
+       if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+       {
+               return GL_FALSE;
+       }
+
+       if ( GL_FALSE == next_ins(pAsm) )
+       {
+               return GL_FALSE;
+       }
+
+       // SIN tmp.y,    a.x
+       pAsm->D.dst.opcode = SQ_OP2_INST_SIN;
+       pAsm->D.dst.math = 1;
+
+       setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+       pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+       pAsm->D.dst.reg = tmp;
+       pAsm->D.dst.writey = 1;
+
+       if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+       {
+               return GL_FALSE;
+       }
+
+       if( GL_FALSE == next_ins(pAsm) )
+       {
+               return GL_FALSE;
+       }
+
+       // MOV dst.mask,     tmp
+       pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+       if( GL_FALSE == assemble_dst(pAsm) )
+       {
+               return GL_FALSE;
+       }
+
+       setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+       pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+       pAsm->S[0].src.reg = tmp;
+
+       noswizzle_PVSSRC(&(pAsm->S[0].src));
+       pAsm->S[0].src.swizzlez = SQ_SEL_0;
+       pAsm->S[0].src.swizzlew = SQ_SEL_0;
+
+       if ( GL_FALSE == next_ins(pAsm) )
+       {
+               return GL_FALSE;
+       }
+
+    return GL_TRUE;
+}
+GLboolean assemble_SGE(r700_AssemblerBase *pAsm) 
+{
+    if( GL_FALSE == checkop2(pAsm) )
+    {
+           return GL_FALSE;
+    }
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_SETGE;  
+
+    if( GL_FALSE == assemble_dst(pAsm) )
+    {
+           return GL_FALSE;
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+           return GL_FALSE;
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+    {
+           return GL_FALSE;
+    }
+
+    if( GL_FALSE == next_ins(pAsm) ) 
+    {
+           return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+GLboolean assemble_SLT(r700_AssemblerBase *pAsm) 
+{
+    if( GL_FALSE == checkop2(pAsm) )
+    {
+           return GL_FALSE;
+    }
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_SETGT;  
+
+    if( GL_FALSE == assemble_dst(pAsm) )
+    {
+        return GL_FALSE;
+    }
+                
+    if( GL_FALSE == assemble_src(pAsm, 0, 1) )  
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 1, 0) )  
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+GLboolean assemble_STP(r700_AssemblerBase *pAsm) 
+{
+    return GL_TRUE;
+}
+GLboolean assemble_TEX(r700_AssemblerBase *pAsm) 
+{
+    GLboolean src_const;
+
+    switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File)
+    {
+    case PROGRAM_CONSTANT:
+    case PROGRAM_LOCAL_PARAM:
+    case PROGRAM_ENV_PARAM:
+    case PROGRAM_STATE_VAR:
+        src_const = GL_TRUE;
+    case PROGRAM_TEMPORARY:
+    case PROGRAM_INPUT:
+        src_const = GL_FALSE;
+    }
+
+    if (GL_TRUE == src_const) 
+    {
+        r700_error(TODO_ASM_CONSTTEXADDR, "TODO: Texture coordinates from a constant register not supported.");
+        return GL_FALSE;
+    }
+
+    switch (pAsm->pILInst[pAsm->uiCurInst].Opcode) 
+    {
+        case OPCODE_TEX:
+            pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;            
+            break;
+        case OPCODE_TXB:            
+            r700_error(TODO_ASM_TXB, "do not support TXB yet");
+            return GL_FALSE;
+            break;
+        case OPCODE_TXP:            
+            /* TODO : tex proj version : divid first 3 components by 4th */ 
+            pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
+            break;
+        default:
+            r700_error(ERROR_ASM_BADTEXINST, "Internal error: bad texture op (not TEX)");
+            return GL_FALSE;
+            break;
+    }
+
+    // Set src1 to tex unit id
+    pAsm->S[1].src.reg   = pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit;
+    pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
+
+    //No sw info from mesa compiler, so hard code here.
+    pAsm->S[1].src.swizzlex = SQ_SEL_X;
+    pAsm->S[1].src.swizzley = SQ_SEL_Y;
+    pAsm->S[1].src.swizzlez = SQ_SEL_Z;
+    pAsm->S[1].src.swizzlew = SQ_SEL_W;
+
+    if( GL_FALSE == tex_dst(pAsm) )
+    {
+        return GL_FALSE;
+    }
+    if( GL_FALSE == tex_src(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    if ( GL_FALSE == next_ins(pAsm) ) 
+    {
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_XPD(r700_AssemblerBase *pAsm) 
+{
+    BITS tmp;
+
+    if( GL_FALSE == checkop2(pAsm) )
+    {
+           return GL_FALSE;
+    }
+
+    tmp = gethelpr(pAsm);
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
+
+    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+    pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg   = tmp;
+    nomask_PVSDST(&(pAsm->D.dst));
+  
+    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+    {
+        return GL_FALSE;
+    }
+    swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
+    swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
+
+    if( GL_FALSE == next_ins(pAsm) ) 
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+    pAsm->D.dst.op3    = 1;
+
+    if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
+    {
+        tmp = gethelpr(pAsm);
+
+        setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+        pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+        pAsm->D.dst.reg   = tmp;
+
+        nomask_PVSDST(&(pAsm->D.dst));
+    }
+    else 
+    {
+        if( GL_FALSE == assemble_dst(pAsm) )
+        {
+            return GL_FALSE;
+        }
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+    {
+        return GL_FALSE;
+    }
+    swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
+    swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
+
+    // result1 + (neg) result0
+    setaddrmode_PVSSRC(&(pAsm->S[2].src),ADDR_ABSOLUTE);
+    pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
+    pAsm->S[2].src.reg   = tmp;
+
+    neg_PVSSRC(&(pAsm->S[2].src));
+    noswizzle_PVSSRC(&(pAsm->S[2].src));
+
+    if( GL_FALSE == next_ins(pAsm) ) 
+    {
+        return GL_FALSE;
+    }
+
+
+    if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask) 
+    {
+        if( GL_FALSE == assemble_dst(pAsm) )
+        {
+            return GL_FALSE;
+        }
+
+        pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+        // Use tmp as source
+        setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+        pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+        pAsm->S[0].src.reg   = tmp;
+
+        noneg_PVSSRC(&(pAsm->S[0].src));
+        noswizzle_PVSSRC(&(pAsm->S[0].src));
+
+        if( GL_FALSE == next_ins(pAsm) )
+        {
+            return GL_FALSE;
+        }
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm)
+{
+    return GL_TRUE;
+}
+
+GLboolean assemble_IF(r700_AssemblerBase *pAsm)
+{
+    return GL_TRUE;
+}
+
+GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm)
+{
+    return GL_TRUE;
+}
+
+GLboolean AssembleInstr(GLuint uiNumberInsts,
+                        struct prog_instruction *pILInst, 
+                                               r700_AssemblerBase *pR700AsmCode)
+{
+    GLuint i;
+
+    pR700AsmCode->pILInst = pILInst;
+       for(i=0; i<uiNumberInsts; i++)
+    {
+        pR700AsmCode->uiCurInst = i;
+
+        switch (pILInst[i].Opcode)
+        {
+        case OPCODE_ABS: 
+            if ( GL_FALSE == assemble_ABS(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+        case OPCODE_ADD: 
+        case OPCODE_SUB: 
+            if ( GL_FALSE == assemble_ADD(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+
+        case OPCODE_ARL: 
+            r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_ARL ");
+            //if ( GL_FALSE == assemble_BAD("ARL") ) 
+                return GL_FALSE;
+            break;
+        case OPCODE_ARR: 
+            r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_ARR ");
+            //if ( GL_FALSE == assemble_BAD("ARR") ) 
+                return GL_FALSE;
+            break;
+
+        case OPCODE_CMP: 
+            if ( GL_FALSE == assemble_CMP(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+        case OPCODE_COS: 
+            if ( GL_FALSE == assemble_COS(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+
+        case OPCODE_DP3: 
+        case OPCODE_DP4: 
+        case OPCODE_DPH: 
+            if ( GL_FALSE == assemble_DOT(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+
+        case OPCODE_DST: 
+            if ( GL_FALSE == assemble_DST(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+
+        case OPCODE_EX2: 
+            if ( GL_FALSE == assemble_EX2(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+        case OPCODE_EXP: 
+            r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_EXP ");
+            //if ( GL_FALSE == assemble_BAD("EXP") ) 
+                return GL_FALSE;
+            break; // approx of EX2
+
+        case OPCODE_FLR:     
+            if ( GL_FALSE == assemble_FLR(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+        //case OP_FLR_INT: 
+        //    if ( GL_FALSE == assemble_FLR_INT() ) 
+        //        return GL_FALSE;
+        //    break;  
+
+        case OPCODE_FRC: 
+            if ( GL_FALSE == assemble_FRC(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+
+        case OPCODE_KIL: 
+            if ( GL_FALSE == assemble_KIL(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;
+        case OPCODE_LG2: 
+            if ( GL_FALSE == assemble_LG2(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+        case OPCODE_LIT:
+            if ( GL_FALSE == assemble_LIT(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;
+        case OPCODE_LRP: 
+            if ( GL_FALSE == assemble_LRP(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+        case OPCODE_LOG: 
+            r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_LOG ");
+            //if ( GL_FALSE == assemble_BAD("LOG") ) 
+                return GL_FALSE;
+            break; // approx of LG2
+
+        case OPCODE_MAD: 
+            if ( GL_FALSE == assemble_MAD(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+        case OPCODE_MAX: 
+            if ( GL_FALSE == assemble_MAX(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+        case OPCODE_MIN: 
+            if ( GL_FALSE == assemble_MIN(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+
+        case OPCODE_MOV: 
+            if ( GL_FALSE == assemble_MOV(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+        case OPCODE_MUL: 
+            if ( GL_FALSE == assemble_MUL(pR700AsmCode) ) 
+                return GL_FALSE;
+            break; 
+
+        case OPCODE_POW: 
+            if ( GL_FALSE == assemble_POW(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+        case OPCODE_RCP: 
+            if ( GL_FALSE == assemble_RCP(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+        case OPCODE_RSQ: 
+            if ( GL_FALSE == assemble_RSQ(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+        case OPCODE_SIN: 
+            if ( GL_FALSE == assemble_SIN(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+        case OPCODE_SCS: 
+            if ( GL_FALSE == assemble_SCS(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+
+        case OPCODE_SGE: 
+            if ( GL_FALSE == assemble_SGE(pR700AsmCode) ) 
+                return GL_FALSE;
+            break; 
+        case OPCODE_SLT: 
+            if ( GL_FALSE == assemble_SLT(pR700AsmCode) ) 
+                return GL_FALSE;
+            break; 
+
+        //case OP_STP: 
+        //    if ( GL_FALSE == assemble_STP(pR700AsmCode) ) 
+        //        return GL_FALSE;
+        //    break;
+
+        case OPCODE_SWZ: 
+            if ( GL_FALSE == assemble_MOV(pR700AsmCode) ) 
+            {
+                return GL_FALSE; 
+            }
+            else
+            {
+                if( (i+1)<uiNumberInsts )
+                {
+                    if(OPCODE_END != pILInst[i+1].Opcode)
+                    {
+                        if( GL_TRUE == IsTex(pILInst[i+1].Opcode) )
+                        {
+                            pR700AsmCode->pInstDeps[i+1].nDstDep = i+1; //=1?
+                        }
+                    }
+                }
+            }
+            break;
+
+        case OPCODE_TEX: 
+        case OPCODE_TXB:  
+        case OPCODE_TXP: 
+            if ( GL_FALSE == assemble_TEX(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;
+
+        case OPCODE_XPD: 
+            if ( GL_FALSE == assemble_XPD(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+
+        case OPCODE_IF   : 
+            if ( GL_FALSE == assemble_IF(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;
+        case OPCODE_ELSE : 
+            r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_ELSE ");
+            //if ( GL_FALSE == assemble_BAD("ELSE") ) 
+                return GL_FALSE;
+            break;
+        case OPCODE_ENDIF: 
+            if ( GL_FALSE == assemble_ENDIF(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;
+
+        //case OPCODE_EXPORT: 
+        //    if ( GL_FALSE == assemble_EXPORT() ) 
+        //        return GL_FALSE;
+        //    break;
+
+        case OPCODE_END: 
+                       //pR700AsmCode->uiCurInst = i;
+                       //This is to remaind that if in later exoort there is depth/stencil
+                       //export, we need a mov to re-arrange DST channel, where using a
+                       //psuedo inst, we will use this end inst to do it.
+            return GL_TRUE;
+
+        default:
+            r700_error(ERROR_ASM_UNKNOWNILINST, "internal: unknown instruction");
+            return GL_FALSE;
+        }
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean Process_Export(r700_AssemblerBase* pAsm,
+                         GLuint type,
+                         GLuint export_starting_index,
+                         GLuint export_count, 
+                         GLuint starting_register_number,
+                         GLboolean is_depth_export)
+{
+    unsigned char ucWriteMask;
+
+    check_current_clause(pAsm, CF_EMPTY_CLAUSE);
+    check_current_clause(pAsm, CF_EXPORT_CLAUSE); //alloc the cf_current_export_clause_ptr
+
+    pAsm->cf_current_export_clause_ptr->m_Word0.f.type = type;
+
+    switch (type) 
+    {
+        case SQ_EXPORT_PIXEL:
+            if(GL_TRUE == is_depth_export) 
+            {
+                pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base  = SQ_CF_PIXEL_Z;
+            }
+            else 
+            {
+                pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base  = SQ_CF_PIXEL_MRT0 + export_starting_index;
+            }
+            break;
+
+        case SQ_EXPORT_POS:
+            pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base  = SQ_CF_POS_0 + export_starting_index; 
+            break;
+
+        case SQ_EXPORT_PARAM:
+            pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base  = 0x0 + export_starting_index; 
+            break;
+
+        default:
+            r700_error(ERROR_ASM_BADEXPORTTYPE, "Unknown export type: %d", type);
+            return GL_FALSE;
+            break;
+    }
+
+    pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_gpr      = starting_register_number;
+
+    pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_rel      = SQ_ABSOLUTE;
+    pAsm->cf_current_export_clause_ptr->m_Word0.f.index_gpr   = 0x0;
+    pAsm->cf_current_export_clause_ptr->m_Word0.f.elem_size   = 0x3; 
+
+    pAsm->cf_current_export_clause_ptr->m_Word1.f.burst_count      = (export_count - 1);
+    pAsm->cf_current_export_clause_ptr->m_Word1.f.end_of_program   = 0x0;
+    pAsm->cf_current_export_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+    pAsm->cf_current_export_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_EXPORT;  // _DONE
+    pAsm->cf_current_export_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
+    pAsm->cf_current_export_clause_ptr->m_Word1.f.barrier          = 0x1;
+
+    if (export_count == 1) 
+    {
+        ucWriteMask = pAsm->pucOutMask[starting_register_number - pAsm->starting_export_register_number];
+
+        if( (ucWriteMask & 0x1) != 0)
+        {
+            pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
+        }
+        else
+        {
+            pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_MASK;
+        }
+        if( ((ucWriteMask>>1) & 0x1) != 0)
+        {
+            pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
+        }
+        else
+        {
+            pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_MASK;
+        }
+        if( ((ucWriteMask>>2) & 0x1) != 0)
+        {
+            pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
+        }
+        else
+        {
+            pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_MASK;
+        }
+        if( ((ucWriteMask>>3) & 0x1) != 0)
+        {
+            pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
+        }
+        else
+        {
+            pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_MASK;
+        }
+    }
+    else 
+    {
+        // This should only be used if all components for all registers have been written
+        pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
+        pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
+        pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
+        pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
+    }
+
+    pAsm->cf_last_export_ptr = pAsm->cf_current_export_clause_ptr;
+
+    return GL_TRUE;
+}
+
+GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm, BITS depth_channel_select)
+{
+       gl_inst_opcode Opcode_save = pAsm->pILInst[pAsm->uiCurInst].Opcode; //Should be OPCODE_END
+    pAsm->pILInst[pAsm->uiCurInst].Opcode = OPCODE_MOV;
+
+    // MOV depth_export_register.hw_depth_channel, depth_export_register.depth_channel_select
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+    pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg   = pAsm->depth_export_register_number;
+
+    pAsm->D.dst.writex = 1;   // depth          goes in R channel for HW                       
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+    pAsm->S[0].src.reg   = pAsm->depth_export_register_number;
+
+    setswizzle_PVSSRC(&(pAsm->S[0].src), depth_channel_select);
+
+    noneg_PVSSRC(&(pAsm->S[0].src));
+
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->pILInst[pAsm->uiCurInst].Opcode = Opcode_save;
+
+    return GL_TRUE;
+}
+GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
+                                   GLbitfield          OutputsWritten)  
+{ 
+    unsigned int unBit;
+
+    if(pR700AsmCode->depth_export_register_number >= 0) 
+    {
+        if( GL_FALSE == Move_Depth_Exports_To_Correct_Channels(pR700AsmCode, SQ_SEL_Z) )  // depth
+               {
+                       return GL_FALSE;
+               }
+    }
+
+    unBit = 1 << FRAG_RESULT_COLOR;
+       if(OutputsWritten & unBit)
+       {
+               if( GL_FALSE == Process_Export(pR700AsmCode,
+                                       SQ_EXPORT_PIXEL, 
+                                       0, 
+                                       1, 
+                                       pR700AsmCode->uiFP_OutputMap[FRAG_RESULT_COLOR], 
+                                       GL_FALSE) ) 
+        {
+            return GL_FALSE;
+        }
+       }
+       unBit = 1 << FRAG_RESULT_DEPTH;
+       if(OutputsWritten & unBit)
+       {
+        if( GL_FALSE == Process_Export(pR700AsmCode,
+                                       SQ_EXPORT_PIXEL, 
+                                       0, 
+                                       1, 
+                                       pR700AsmCode->uiFP_OutputMap[FRAG_RESULT_DEPTH], 
+                                       GL_TRUE)) 
+        {
+            return GL_FALSE;
+        }
+       }
+
+    if(pR700AsmCode->cf_last_export_ptr != NULL) 
+    {
+        pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst        = SQ_CF_INST_EXPORT_DONE;
+        pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode,
+                                 GLbitfield          OutputsWritten)  
+{
+    unsigned int unBit;
+    unsigned int i;
+
+    GLuint export_starting_index  = 0;
+    GLuint export_count           = pR700AsmCode->number_of_exports;
+
+    unBit = 1 << VERT_RESULT_HPOS;
+       if(OutputsWritten & unBit)
+       {
+        if( GL_FALSE == Process_Export(pR700AsmCode, 
+                                       SQ_EXPORT_POS, 
+                                       export_starting_index, 
+                                       1, 
+                                       pR700AsmCode->ucVP_OutputMap[VERT_RESULT_HPOS],
+                                       GL_FALSE) )
+        {
+            return GL_FALSE;
+        }
+
+        export_count--;
+
+        pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
+       }
+
+    pR700AsmCode->number_of_exports = export_count;
+
+       unBit = 1 << VERT_RESULT_COL0;
+       if(OutputsWritten & unBit)
+       {
+        if( GL_FALSE == Process_Export(pR700AsmCode, 
+                                       SQ_EXPORT_PARAM, 
+                                       export_starting_index, 
+                                       1, 
+                                       pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL0],
+                                       GL_FALSE) )
+        {
+            return GL_FALSE;
+        }
+
+        export_starting_index++;
+       }
+
+       unBit = 1 << VERT_RESULT_COL1;
+       if(OutputsWritten & unBit)
+       {
+        if( GL_FALSE == Process_Export(pR700AsmCode, 
+                                       SQ_EXPORT_PARAM, 
+                                       export_starting_index, 
+                                       1, 
+                                       pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL1],
+                                       GL_FALSE) )
+        {
+            return GL_FALSE;
+        }
+
+        export_starting_index++;
+       }
+
+       for(i=0; i<8; i++)
+       {
+               unBit = 1 << (VERT_RESULT_TEX0 + i);
+               if(OutputsWritten & unBit)
+               {
+            if( GL_FALSE == Process_Export(pR700AsmCode,
+                                          SQ_EXPORT_PARAM, 
+                                          export_starting_index, 
+                                          1, 
+                                          pR700AsmCode->ucVP_OutputMap[VERT_RESULT_TEX0 + i],
+                                          GL_FALSE) )
+            {
+                return GL_FALSE;
+            }
+
+            export_starting_index++;
+               }
+       }
+
+    // At least one param should be exported
+    if (export_count) 
+    {
+        pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;    
+    }
+    else
+    {
+        if( GL_FALSE == Process_Export(pR700AsmCode,
+                                       SQ_EXPORT_PARAM, 
+                                       0, 
+                                       1, 
+                                       0,
+                                       GL_FALSE) )
+        {
+            return GL_FALSE;
+        }
+      
+        pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_0;
+        pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_0;
+        pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_0;
+        pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_1;
+        pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
+    }
+
+    pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
+
+    return GL_TRUE;
+}
+
+GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode)
+{
+    FREE(pR700AsmCode->pucOutMask);
+    FREE(pR700AsmCode->pInstDeps);
+    return GL_TRUE;
+}
+
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h
new file mode 100644 (file)
index 0000000..e9b21b8
--- /dev/null
@@ -0,0 +1,512 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#ifndef _R700_ASSEMBLER_H_
+#define _R700_ASSEMBLER_H_
+
+#include "main/mtypes.h"
+#include "shader/prog_instruction.h"
+
+#include "r700_chip.h"
+#include "r700_shaderinst.h"
+#include "r700_shader.h"
+
+typedef enum SHADER_PIPE_TYPE 
+{
+    SPT_VP = 0,
+    SPT_FP = 1
+} SHADER_PIPE_TYPE;
+
+typedef enum ConstantCycles 
+{
+    NUMBER_OF_CYCLES     = 3,
+    NUMBER_OF_COMPONENTS = 4
+} ConstantCycles;
+
+typedef enum  HARDWARE_LIMIT_VALUES  
+{
+   TEMPORARY_REGISTER_OFFSET = SQ_ALU_SRC_GPR_BASE,
+   MAX_TEMPORARY_REGISTERS   = SQ_ALU_SRC_GPR_SIZE,
+   MAX_CONSTANT_REGISTERS    = SQ_ALU_SRC_CFILE_SIZE,
+   CFILE_REGISTER_OFFSET     = SQ_ALU_SRC_CFILE_BASE,
+   NUMBER_OF_INPUT_COLORS    = 2,
+   NUMBER_OF_OUTPUT_COLORS   = 8,
+   NUMBER_OF_TEXTURE_UNITS   = 16,
+   MEGA_FETCH_BYTES          = 32
+} HARDWARE_LIMIT_VALUES;
+
+typedef enum AddressMode 
+{
+    ADDR_ABSOLUTE          = 0,
+    ADDR_RELATIVE_A0       = 1,
+    ADDR_RELATIVE_FLI_0    = 2,
+    NUMBER_OF_ADDR_MOD     = 3
+} AddressMode;
+
+typedef enum SrcRegisterType 
+{
+    SRC_REG_TEMPORARY      = 0,
+    SRC_REG_INPUT          = 1,
+    SRC_REG_CONSTANT       = 2,
+    SRC_REG_ALT_TEMPORARY  = 3,
+    NUMBER_OF_SRC_REG_TYPE = 4
+} SrcRegisterType;
+
+typedef enum DstRegisterType 
+{
+    DST_REG_TEMPORARY      = 0,
+    DST_REG_A0             = 1,
+    DST_REG_OUT            = 2,
+    DST_REG_OUT_X_REPL     = 3,
+    DST_REG_ALT_TEMPORARY  = 4,
+    DST_REG_INPUT          = 5,
+    NUMBER_OF_DST_REG_TYPE = 6
+} DstRegisterType;
+
+typedef unsigned int BITS;
+
+typedef struct PVSDSTtag 
+{
+       BITS opcode:8;     //(:6)  //@@@ really should be 10 bits for OP2
+       BITS math:1;
+       BITS predicated:1; //10   //8
+       BITS pred_inv  :1; //11   //8
+
+       BITS rtype:3;
+       BITS reg:10;       //24   //20
+
+       BITS writex:1;
+       BITS writey:1;
+       BITS writez:1;
+       BITS writew:1;     //28
+
+       BITS op3:1;       // 29  Represents *_OP3_* ALU opcode
+
+       BITS dualop:1;    // 30  //26
+
+       BITS addrmode0:1; //31   //29
+       BITS addrmode1:1; //32
+} PVSDST;
+
+typedef struct PVSSRCtag 
+{
+       BITS rtype:4;            
+       BITS addrmode0:1;        
+       BITS reg:10;      //15     (8)
+       BITS swizzlex:3;
+       BITS swizzley:3;
+       BITS swizzlez:3;
+       BITS swizzlew:3;  //27        
+
+       BITS negx:1;
+       BITS negy:1;
+       BITS negz:1;
+       BITS negw:1;      //31
+       //BITS addrsel:2;
+       BITS addrmode1:1; //32
+} PVSSRC;
+
+typedef struct PVSMATHtag 
+{
+       BITS rtype:4;
+       BITS spare:1;
+       BITS reg:8;
+       BITS swizzlex:3;
+       BITS swizzley:3;
+       BITS dstoff:2; // 2 bits of dest offset into alt ram
+       BITS opcode:4;
+       BITS negx:1;
+       BITS negy:1;
+       BITS dstcomp:2; // select dest component
+       BITS spare2:3;
+} PVSMATH;
+
+typedef union PVSDWORDtag 
+{
+       BITS    bits;
+       PVSDST  dst;
+       PVSSRC  src;
+       PVSMATH math;
+       float   f;
+} PVSDWORD;
+
+typedef struct VAP_OUT_VTX_FMT_0tag 
+{
+       BITS pos:1;      // 0
+       BITS misc:1;
+       BITS clip_dist0:1;
+       BITS clip_dist1:1;
+       BITS pos_param:1; // 4
+
+       BITS color0:1;    // 5
+       BITS color1:1;
+       BITS color2:1;
+       BITS color3:1;
+       BITS color4:1;
+       BITS color5:1;
+       BITS color6:1;
+       BITS color7:1;
+
+       BITS normal:1;    
+
+       BITS depth:1;          // 14
+
+       BITS point_size:1;     // 15   
+       BITS edge_flag:1;      
+       BITS rta_index:1;      //     shares same channel as kill_flag
+       BITS kill_flag:1;
+       BITS viewport_index:1; // 19   
+
+       BITS resvd1:12;        // 20
+} VAP_OUT_VTX_FMT_0;
+
+typedef struct VAP_OUT_VTX_FMT_1tag 
+{
+       BITS tex0comp:3;
+       BITS tex1comp:3;
+       BITS tex2comp:3;
+       BITS tex3comp:3;
+       BITS tex4comp:3;
+       BITS tex5comp:3;
+       BITS tex6comp:3;
+       BITS tex7comp:3;
+
+       BITS resvd:8;
+} VAP_OUT_VTX_FMT_1;
+
+typedef struct VAP_OUT_VTX_FMT_2tag 
+{
+       BITS tex8comp :3;
+       BITS tex9comp :3;
+       BITS tex10comp:3;
+       BITS tex11comp:3;
+       BITS tex12comp:3;
+       BITS tex13comp:3;
+       BITS tex14comp:3;
+       BITS tex15comp:3;
+
+       BITS resvd:8;
+} VAP_OUT_VTX_FMT_2;
+
+typedef struct OUT_FRAGMENT_FMT_0tag 
+{
+       BITS color0:1;
+       BITS color1:1;
+       BITS color2:1;
+       BITS color3:1;
+       BITS color4:1;
+       BITS color5:1;
+       BITS color6:1;
+       BITS color7:1;
+
+       BITS depth:1;
+       BITS stencil_ref:1;
+       BITS coverage_to_mask:1;
+       BITS mask:1;
+
+       BITS resvd1:20;
+} OUT_FRAGMENT_FMT_0;
+
+typedef enum  CF_CLAUSE_TYPE 
+{
+   CF_EXPORT_CLAUSE,
+   CF_ALU_CLAUSE,
+   CF_TEX_CLAUSE,
+   CF_VTX_CLAUSE,
+   CF_OTHER_CLAUSE,
+   CF_EMPTY_CLAUSE,
+   NUMBER_CF_CLAUSE_TYPES
+} CF_CLAUSE_TYPE;
+
+enum 
+{
+    MAX_BOOL_CONSTANTS   = 32,
+    MAX_INT_CONSTANTS    = 32,
+    MAX_FLOAT_CONSTANTS  = 256,
+
+    FC_NONE = 0,
+    FC_IF = 1,
+    FC_LOOP = 2,
+    FC_REP = 3,
+
+    COND_NONE = 0,
+    COND_BOOL = 1,
+    COND_PRED = 2,
+    COND_ALU = 3,
+
+    SAFEDIST_TEX = 6, ///< safe distance for using result of texture lookup in alu or another tex lookup
+    SAFEDIST_ALU = 6 ///< the same for alu->fc
+};
+
+typedef struct FC_LEVEL 
+{
+       unsigned int           first; ///< first fc instruction on level (if, rep, loop)
+       unsigned int*          mid; ///< middle instructions - else or all breaks on this level
+       unsigned int           midLen;
+       unsigned int           type;
+       unsigned int           cond;
+       unsigned int           inv;
+       unsigned int           bpush; ///< 1 if first instruction does branch stack push
+                        int           id; ///< id of bool or int variable
+} FC_LEVEL;
+
+typedef struct VTX_FETCH_METHOD 
+{
+       GLboolean bEnableMini;
+       GLuint mega_fetch_remainder;
+} VTX_FETCH_METHOD;
+
+typedef struct r700_AssemblerBase 
+{
+       R700ControlFlowSXClause*      cf_last_export_ptr;
+       R700ControlFlowSXClause*      cf_current_export_clause_ptr;
+       R700ControlFlowALUClause*     cf_current_alu_clause_ptr;
+       R700ControlFlowGenericClause* cf_current_tex_clause_ptr;
+       R700ControlFlowGenericClause* cf_current_vtx_clause_ptr;
+       R700ControlFlowGenericClause* cf_current_cf_clause_ptr;
+
+    //Result shader
+    R700_Shader * pR700Shader;
+
+       // No clause has been created yet
+       CF_CLAUSE_TYPE cf_current_clause_type;
+
+       GLuint number_of_exports;
+       GLuint number_of_colorandz_exports;
+       GLuint number_of_export_opcodes;
+
+       PVSDWORD D;
+       PVSDWORD S[3];
+
+       unsigned int uLastPosUpdate;
+
+       OUT_FRAGMENT_FMT_0     fp_stOutFmt0;
+
+       unsigned int uIIns;
+       unsigned int uOIns;
+       unsigned int number_used_registers;
+       unsigned int uUsedConsts; 
+
+       // Fragment programs
+       unsigned int uiFP_AttributeMap[FRAG_ATTRIB_MAX];
+       unsigned int uiFP_OutputMap[FRAG_RESULT_MAX];
+       unsigned int uBoolConsts;
+       unsigned int uIntConsts;
+       unsigned int uInsts;
+       unsigned int uConsts;
+
+       // Vertex programs
+       unsigned char ucVP_AttributeMap[VERT_ATTRIB_MAX];
+       unsigned char ucVP_OutputMap[VERT_RESULT_MAX];
+
+    unsigned char * pucOutMask;
+
+       //-----------------------------------------------------------------------------------
+       // flow control members
+       //-----------------------------------------------------------------------------------
+       unsigned int FCSP;
+       FC_LEVEL fc_stack[32];
+
+       unsigned int branch_depth;
+       unsigned int max_branch_depth;
+
+       //-----------------------------------------------------------------------------------
+       // ArgSubst used in Assemble_Source() function
+       //-----------------------------------------------------------------------------------
+       int aArgSubst[4];
+
+    GLint hw_gpr[ NUMBER_OF_CYCLES ][ NUMBER_OF_COMPONENTS ];
+    GLint hw_cfile_addr[ NUMBER_OF_COMPONENTS ];
+    GLint hw_cfile_chan[ NUMBER_OF_COMPONENTS ];
+
+    GLuint uOutputs;
+  
+    GLint color_export_register_number[NUMBER_OF_OUTPUT_COLORS];
+       GLint depth_export_register_number;
+
+       GLint stencil_export_register_number;
+       GLint coverage_to_mask_export_register_number;
+       GLint mask_export_register_number;
+
+       GLuint starting_export_register_number;
+       GLuint starting_vfetch_register_number;
+       GLuint starting_temp_register_number;
+       GLuint uHelpReg;
+       GLuint uFirstHelpReg;
+
+       GLboolean input_position_is_used;
+       GLboolean input_normal_is_used;
+
+       GLboolean input_color_is_used[NUMBER_OF_INPUT_COLORS];
+  
+       GLboolean input_texture_unit_is_used[NUMBER_OF_TEXTURE_UNITS];
+  
+    R700VertexGenericFetch* vfetch_instruction_ptr_array[VERT_ATTRIB_MAX];
+  
+       GLuint number_of_inputs;
+
+    InstDeps *pInstDeps;
+
+    SHADER_PIPE_TYPE currentShaderType;
+    struct prog_instruction * pILInst;
+    GLuint             uiCurInst;
+    GLboolean   bR6xx;
+} r700_AssemblerBase;
+
+//Internal use
+BITS addrmode_PVSDST(PVSDST * pPVSDST);
+void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode);
+void nomask_PVSDST(PVSDST * pPVSDST);
+BITS addrmode_PVSSRC(PVSSRC* pPVSSRC);
+void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode);
+void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz);
+void noswizzle_PVSSRC(PVSSRC* pPVSSRC);
+void swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w);
+void neg_PVSSRC(PVSSRC* pPVSSRC);
+void noneg_PVSSRC(PVSSRC* pPVSSRC);
+void flipneg_PVSSRC(PVSSRC* pPVSSRC);
+void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c);
+void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c);
+BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0);
+BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt) ;
+GLboolean is_reduction_opcode(PVSDWORD * dest);
+GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size);
+
+unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm);
+
+GLboolean IsTex(gl_inst_opcode Opcode);
+GLboolean IsAlu(gl_inst_opcode Opcode);
+int check_current_clause(r700_AssemblerBase* pAsm,
+                                            CF_CLAUSE_TYPE      new_clause_type);
+GLboolean add_vfetch_instruction(r700_AssemblerBase*     pAsm,
+                                                                R700VertexInstruction*  vertex_instruction_ptr);
+GLboolean add_tex_instruction(r700_AssemblerBase*     pAsm,
+                              R700TextureInstruction* tex_instruction_ptr);
+GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
+                                                               GLuint gl_client_id,
+                                GLuint destination_register,
+                                                               GLuint number_of_elements,
+                                GLenum dataElementType,
+                                                               VTX_FETCH_METHOD* pFetchMethod);
+GLuint gethelpr(r700_AssemblerBase* pAsm);
+void resethelpr(r700_AssemblerBase* pAsm);
+void checkop_init(r700_AssemblerBase* pAsm);
+GLboolean mov_temp(r700_AssemblerBase* pAsm, int src);
+GLboolean checkop1(r700_AssemblerBase* pAsm);
+GLboolean checkop2(r700_AssemblerBase* pAsm);
+GLboolean checkop3(r700_AssemblerBase* pAsm);
+GLboolean assemble_src(r700_AssemblerBase *pAsm,
+                       int src, 
+                       int fld);
+GLboolean assemble_dst(r700_AssemblerBase *pAsm);
+GLboolean tex_dst(r700_AssemblerBase *pAsm);
+GLboolean tex_src(r700_AssemblerBase *pAsm);
+GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm);
+void initialize(r700_AssemblerBase *pAsm);
+GLboolean assemble_alu_src(R700ALUInstruction*  alu_instruction_ptr,
+                           int                  source_index,
+                           PVSSRC*              pSource,
+                           BITS                 scalar_channel_index);
+GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
+                              R700ALUInstruction* alu_instruction_ptr,
+                              GLuint              contiguous_slots_needed);
+void get_src_properties(R700ALUInstruction*  alu_instruction_ptr,
+                        int                  source_index,
+                        BITS*                psrc_sel,
+                        BITS*                psrc_rel,
+                        BITS*                psrc_chan,
+                        BITS*                psrc_neg);
+int is_cfile(BITS sel);
+int is_const(BITS sel);
+int is_gpr(BITS sel);
+GLboolean reserve_cfile(r700_AssemblerBase* pAsm, 
+                        GLuint sel, 
+                        GLuint chan);
+GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle);
+GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle);
+GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle);
+GLboolean check_scalar(r700_AssemblerBase* pAsm,
+                       R700ALUInstruction* alu_instruction_ptr);
+GLboolean check_vector(r700_AssemblerBase* pAsm,
+                       R700ALUInstruction* alu_instruction_ptr);
+GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm);
+GLboolean next_ins(r700_AssemblerBase *pAsm);
+GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode);
+GLboolean assemble_ABS(r700_AssemblerBase *pAsm);
+GLboolean assemble_ADD(r700_AssemblerBase *pAsm);
+GLboolean assemble_BAD(char *opcode_str);
+GLboolean assemble_CMP(r700_AssemblerBase *pAsm);
+GLboolean assemble_COS(r700_AssemblerBase *pAsm);
+GLboolean assemble_DOT(r700_AssemblerBase *pAsm);
+GLboolean assemble_DST(r700_AssemblerBase *pAsm);
+GLboolean assemble_EX2(r700_AssemblerBase *pAsm);
+GLboolean assemble_FLR(r700_AssemblerBase *pAsm);
+GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm);
+GLboolean assemble_FRC(r700_AssemblerBase *pAsm);
+GLboolean assemble_KIL(r700_AssemblerBase *pAsm);
+GLboolean assemble_LG2(r700_AssemblerBase *pAsm);
+GLboolean assemble_LRP(r700_AssemblerBase *pAsm);
+GLboolean assemble_MAD(r700_AssemblerBase *pAsm);
+GLboolean assemble_LIT(r700_AssemblerBase *pAsm);
+GLboolean assemble_MAX(r700_AssemblerBase *pAsm);
+GLboolean assemble_MIN(r700_AssemblerBase *pAsm);
+GLboolean assemble_MOV(r700_AssemblerBase *pAsm);
+GLboolean assemble_MUL(r700_AssemblerBase *pAsm);
+GLboolean assemble_POW(r700_AssemblerBase *pAsm);
+GLboolean assemble_RCP(r700_AssemblerBase *pAsm);
+GLboolean assemble_RSQ(r700_AssemblerBase *pAsm);
+GLboolean assemble_SIN(r700_AssemblerBase *pAsm);
+GLboolean assemble_SCS(r700_AssemblerBase *pAsm);
+GLboolean assemble_SGE(r700_AssemblerBase *pAsm);
+GLboolean assemble_SLT(r700_AssemblerBase *pAsm);
+GLboolean assemble_STP(r700_AssemblerBase *pAsm);
+GLboolean assemble_TEX(r700_AssemblerBase *pAsm);
+GLboolean assemble_XPD(r700_AssemblerBase *pAsm);
+GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm);
+GLboolean assemble_IF(r700_AssemblerBase *pAsm);
+GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm);
+
+GLboolean Process_Export(r700_AssemblerBase* pAsm,
+                         GLuint type, 
+                         GLuint export_starting_index,
+                         GLuint export_count, 
+                         GLuint starting_register_number,
+                         GLboolean is_depth_export);
+GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm, 
+                                                 BITS depth_channel_select);
+
+
+//Interface
+GLboolean AssembleInstr(GLuint uiNumberInsts,
+                        struct prog_instruction *pILInst, 
+                                               r700_AssemblerBase *pR700AsmCode);
+GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten);  
+GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten);
+
+int       Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader);
+GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode);
+
+#endif //_R700_ASSEMBLER_H_
diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c
new file mode 100644 (file)
index 0000000..d0371cd
--- /dev/null
@@ -0,0 +1,420 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ *   CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
+ */
+
+#include "main/imports.h"
+#include "main/glheader.h"
+
+#include "r600_context.h"
+
+#include "r700_chip.h"
+#include "r700_state.h"
+#include "r700_tex.h"
+#include "r700_oglprog.h"
+#include "r700_ioctl.h"
+/* to be enable
+#include "r700_emit.h"
+*/
+
+extern const struct tnl_pipeline_stage *r700_pipeline[];
+
+static GLboolean r700DestroyChipObj(void* pvChipObj)
+{
+    R700_CHIP_CONTEXT *r700;
+
+    if(NULL == pvChipObj)
+    {
+        return GL_TRUE;
+    }
+
+    r700 = (R700_CHIP_CONTEXT *)pvChipObj;
+
+    FREE(r700->pStateList);
+
+    FREE(r700);
+
+    return GL_TRUE;
+}
+
+static void r700InitFuncs(struct dd_function_table *functions)
+{
+    r700InitStateFuncs(functions);
+    r700InitTextureFuncs(functions);
+    r700InitShaderFuncs(functions);
+    r700InitIoctlFuncs(functions);
+}
+
+#define LINK_STATES(reg)                                            \
+do                                                                  \
+{                                                                   \
+    pStateListWork->puiValue = (unsigned int*)&(r700->reg);         \
+    pStateListWork->unOffset = mm##reg - ASIC_CONTEXT_BASE_INDEX; \
+    pStateListWork->pNext    = pStateListWork + 1;                  \
+    pStateListWork++;                                               \
+}while(0)
+
+GLboolean r700InitChipObject(context_t *context)
+{
+    ContextState * pStateListWork;
+
+    R700_CHIP_CONTEXT *r700 = CALLOC( sizeof(R700_CHIP_CONTEXT) );
+
+    context->chipobj.pvChipObj = (void*)r700;
+
+    context->chipobj.DestroyChipObj = r700DestroyChipObj;
+
+    context->chipobj.GetTexObjSize  = r700GetTexObjSize;
+
+    context->chipobj.stages = r700_pipeline;
+
+    context->chipobj.InitFuncs = r700InitFuncs;
+
+    context->chipobj.InitState = r700InitState;
+
+    /* init state list */
+    r700->pStateList = (ContextState*) MALLOC (sizeof(ContextState)*sizeof(R700_CHIP_CONTEXT)/sizeof(unsigned int));
+    pStateListWork = r700->pStateList;
+
+    LINK_STATES(DB_DEPTH_SIZE);  
+    LINK_STATES(DB_DEPTH_VIEW);  
+
+    LINK_STATES(DB_DEPTH_BASE);  
+    LINK_STATES(DB_DEPTH_INFO);  
+    LINK_STATES(DB_HTILE_DATA_BASE);
+
+    LINK_STATES(DB_STENCIL_CLEAR);
+    LINK_STATES(DB_DEPTH_CLEAR);  
+
+    LINK_STATES(PA_SC_SCREEN_SCISSOR_TL);  
+    LINK_STATES(PA_SC_SCREEN_SCISSOR_BR);  
+
+    LINK_STATES(CB_COLOR0_BASE);  
+
+    LINK_STATES(CB_COLOR0_SIZE);  
+
+    LINK_STATES(CB_COLOR0_VIEW);  
+
+    LINK_STATES(CB_COLOR0_INFO); 
+    LINK_STATES(CB_COLOR1_INFO);
+    LINK_STATES(CB_COLOR2_INFO);
+    LINK_STATES(CB_COLOR3_INFO);
+    LINK_STATES(CB_COLOR4_INFO);
+    LINK_STATES(CB_COLOR5_INFO);
+    LINK_STATES(CB_COLOR6_INFO);
+    LINK_STATES(CB_COLOR7_INFO);
+
+    LINK_STATES(CB_COLOR0_TILE);  
+
+    LINK_STATES(CB_COLOR0_FRAG);  
+
+    LINK_STATES(CB_COLOR0_MASK);  
+
+    LINK_STATES(PA_SC_WINDOW_OFFSET);
+    LINK_STATES(PA_SC_WINDOW_SCISSOR_TL);  
+    LINK_STATES(PA_SC_WINDOW_SCISSOR_BR);  
+    LINK_STATES(PA_SC_CLIPRECT_RULE);  
+    LINK_STATES(PA_SC_CLIPRECT_0_TL);  
+    LINK_STATES(PA_SC_CLIPRECT_0_BR);  
+    LINK_STATES(PA_SC_CLIPRECT_1_TL);  
+    LINK_STATES(PA_SC_CLIPRECT_1_BR);  
+    LINK_STATES(PA_SC_CLIPRECT_2_TL);  
+    LINK_STATES(PA_SC_CLIPRECT_2_BR);  
+    LINK_STATES(PA_SC_CLIPRECT_3_TL);  
+    LINK_STATES(PA_SC_CLIPRECT_3_BR);  
+
+    LINK_STATES(PA_SC_EDGERULE);  
+
+    LINK_STATES(CB_TARGET_MASK);  
+    LINK_STATES(CB_SHADER_MASK);  
+    LINK_STATES(PA_SC_GENERIC_SCISSOR_TL);  
+    LINK_STATES(PA_SC_GENERIC_SCISSOR_BR);  
+
+    LINK_STATES(PA_SC_VPORT_SCISSOR_0_TL);  
+    LINK_STATES(PA_SC_VPORT_SCISSOR_0_BR);  
+    LINK_STATES(PA_SC_VPORT_SCISSOR_1_TL);  
+    LINK_STATES(PA_SC_VPORT_SCISSOR_1_BR);  
+
+    LINK_STATES(PA_SC_VPORT_ZMIN_0);
+    LINK_STATES(PA_SC_VPORT_ZMAX_0);  
+
+    LINK_STATES(SX_MISC);  
+
+    LINK_STATES(SQ_VTX_SEMANTIC_0);
+    LINK_STATES(SQ_VTX_SEMANTIC_1); 
+    LINK_STATES(SQ_VTX_SEMANTIC_2); 
+    LINK_STATES(SQ_VTX_SEMANTIC_3); 
+    LINK_STATES(SQ_VTX_SEMANTIC_4); 
+    LINK_STATES(SQ_VTX_SEMANTIC_5); 
+    LINK_STATES(SQ_VTX_SEMANTIC_6); 
+    LINK_STATES(SQ_VTX_SEMANTIC_7); 
+    LINK_STATES(SQ_VTX_SEMANTIC_8); 
+    LINK_STATES(SQ_VTX_SEMANTIC_9); 
+    LINK_STATES(SQ_VTX_SEMANTIC_10);
+    LINK_STATES(SQ_VTX_SEMANTIC_11);
+    LINK_STATES(SQ_VTX_SEMANTIC_12);
+    LINK_STATES(SQ_VTX_SEMANTIC_13);
+    LINK_STATES(SQ_VTX_SEMANTIC_14);
+    LINK_STATES(SQ_VTX_SEMANTIC_15);
+    LINK_STATES(SQ_VTX_SEMANTIC_16);
+    LINK_STATES(SQ_VTX_SEMANTIC_17);
+    LINK_STATES(SQ_VTX_SEMANTIC_18);
+    LINK_STATES(SQ_VTX_SEMANTIC_19);
+    LINK_STATES(SQ_VTX_SEMANTIC_20);
+    LINK_STATES(SQ_VTX_SEMANTIC_21);
+    LINK_STATES(SQ_VTX_SEMANTIC_22);
+    LINK_STATES(SQ_VTX_SEMANTIC_23);
+    LINK_STATES(SQ_VTX_SEMANTIC_24);
+    LINK_STATES(SQ_VTX_SEMANTIC_25);
+    LINK_STATES(SQ_VTX_SEMANTIC_26);
+    LINK_STATES(SQ_VTX_SEMANTIC_27);
+    LINK_STATES(SQ_VTX_SEMANTIC_28);
+    LINK_STATES(SQ_VTX_SEMANTIC_29);
+    LINK_STATES(SQ_VTX_SEMANTIC_30);
+    LINK_STATES(SQ_VTX_SEMANTIC_31);
+
+    LINK_STATES(VGT_MAX_VTX_INDX);  
+    LINK_STATES(VGT_MIN_VTX_INDX);  
+    LINK_STATES(VGT_INDX_OFFSET);  
+    LINK_STATES(VGT_MULTI_PRIM_IB_RESET_INDX);
+    LINK_STATES(SX_ALPHA_TEST_CONTROL); 
+    
+    LINK_STATES(CB_BLEND_RED);  
+    LINK_STATES(CB_BLEND_GREEN);
+    LINK_STATES(CB_BLEND_BLUE); 
+    LINK_STATES(CB_BLEND_ALPHA);
+
+    LINK_STATES(PA_CL_VPORT_XSCALE);  
+    LINK_STATES(PA_CL_VPORT_XOFFSET);  
+    LINK_STATES(PA_CL_VPORT_YSCALE);  
+    LINK_STATES(PA_CL_VPORT_YOFFSET);  
+    LINK_STATES(PA_CL_VPORT_ZSCALE);  
+    LINK_STATES(PA_CL_VPORT_ZOFFSET);  
+
+    LINK_STATES(SPI_VS_OUT_ID_0);  
+    LINK_STATES(SPI_VS_OUT_ID_1);
+    LINK_STATES(SPI_VS_OUT_ID_2);
+    LINK_STATES(SPI_VS_OUT_ID_3);
+    LINK_STATES(SPI_VS_OUT_ID_4);
+    LINK_STATES(SPI_VS_OUT_ID_5);
+    LINK_STATES(SPI_VS_OUT_ID_6);
+    LINK_STATES(SPI_VS_OUT_ID_7);
+    LINK_STATES(SPI_VS_OUT_ID_8);
+    LINK_STATES(SPI_VS_OUT_ID_9);
+
+    LINK_STATES(SPI_PS_INPUT_CNTL_0);  
+    LINK_STATES(SPI_PS_INPUT_CNTL_1);  
+    LINK_STATES(SPI_PS_INPUT_CNTL_2);  
+    LINK_STATES(SPI_PS_INPUT_CNTL_3); 
+    LINK_STATES(SPI_PS_INPUT_CNTL_4);
+    LINK_STATES(SPI_PS_INPUT_CNTL_5); 
+    LINK_STATES(SPI_PS_INPUT_CNTL_6); 
+    LINK_STATES(SPI_PS_INPUT_CNTL_7); 
+    LINK_STATES(SPI_PS_INPUT_CNTL_8); 
+    LINK_STATES(SPI_PS_INPUT_CNTL_9); 
+    LINK_STATES(SPI_PS_INPUT_CNTL_10);
+    LINK_STATES(SPI_PS_INPUT_CNTL_11);
+    LINK_STATES(SPI_PS_INPUT_CNTL_12);
+    LINK_STATES(SPI_PS_INPUT_CNTL_13);
+    LINK_STATES(SPI_PS_INPUT_CNTL_14);
+    LINK_STATES(SPI_PS_INPUT_CNTL_15);
+    LINK_STATES(SPI_PS_INPUT_CNTL_16);
+    LINK_STATES(SPI_PS_INPUT_CNTL_17);
+    LINK_STATES(SPI_PS_INPUT_CNTL_18);
+    LINK_STATES(SPI_PS_INPUT_CNTL_19);
+    LINK_STATES(SPI_PS_INPUT_CNTL_20);
+    LINK_STATES(SPI_PS_INPUT_CNTL_21);
+    LINK_STATES(SPI_PS_INPUT_CNTL_22);
+    LINK_STATES(SPI_PS_INPUT_CNTL_23);
+    LINK_STATES(SPI_PS_INPUT_CNTL_24);
+    LINK_STATES(SPI_PS_INPUT_CNTL_25);
+    LINK_STATES(SPI_PS_INPUT_CNTL_26);
+    LINK_STATES(SPI_PS_INPUT_CNTL_27);
+    LINK_STATES(SPI_PS_INPUT_CNTL_28);
+    LINK_STATES(SPI_PS_INPUT_CNTL_29);
+    LINK_STATES(SPI_PS_INPUT_CNTL_30);
+    LINK_STATES(SPI_PS_INPUT_CNTL_31);
+    LINK_STATES(SPI_VS_OUT_CONFIG);  
+    LINK_STATES(SPI_THREAD_GROUPING);
+    LINK_STATES(SPI_PS_IN_CONTROL_0); 
+    LINK_STATES(SPI_PS_IN_CONTROL_1);
+
+    LINK_STATES(SPI_INPUT_Z); 
+    LINK_STATES(SPI_FOG_CNTL);
+
+    LINK_STATES(CB_BLEND0_CONTROL);  
+
+    LINK_STATES(CB_SHADER_CONTROL);  
+
+    /*LINK_STATES(VGT_DRAW_INITIATOR);  */
+
+    LINK_STATES(DB_DEPTH_CONTROL);  
+
+    LINK_STATES(CB_COLOR_CONTROL);  
+    LINK_STATES(DB_SHADER_CONTROL);  
+    LINK_STATES(PA_CL_CLIP_CNTL);  
+    LINK_STATES(PA_SU_SC_MODE_CNTL);  
+    LINK_STATES(PA_CL_VTE_CNTL);
+    LINK_STATES(PA_CL_VS_OUT_CNTL);
+    LINK_STATES(PA_CL_NANINF_CNTL);
+
+    LINK_STATES(SQ_PGM_START_PS);   
+    LINK_STATES(SQ_PGM_RESOURCES_PS);  
+    LINK_STATES(SQ_PGM_EXPORTS_PS);  
+    LINK_STATES(SQ_PGM_START_VS);    
+    LINK_STATES(SQ_PGM_RESOURCES_VS);  
+    LINK_STATES(SQ_PGM_START_GS);          
+    LINK_STATES(SQ_PGM_RESOURCES_GS);   
+    LINK_STATES(SQ_PGM_START_ES);          
+    LINK_STATES(SQ_PGM_RESOURCES_ES);   
+    LINK_STATES(SQ_PGM_START_FS);          
+    LINK_STATES(SQ_PGM_RESOURCES_FS);   
+    LINK_STATES(SQ_ESGS_RING_ITEMSIZE); 
+    LINK_STATES(SQ_GSVS_RING_ITEMSIZE); 
+    LINK_STATES(SQ_ESTMP_RING_ITEMSIZE);
+    LINK_STATES(SQ_GSTMP_RING_ITEMSIZE);
+    LINK_STATES(SQ_VSTMP_RING_ITEMSIZE);
+    LINK_STATES(SQ_PSTMP_RING_ITEMSIZE);
+    LINK_STATES(SQ_FBUF_RING_ITEMSIZE); 
+    LINK_STATES(SQ_REDUC_RING_ITEMSIZE);
+    LINK_STATES(SQ_GS_VERT_ITEMSIZE);   
+    LINK_STATES(SQ_PGM_CF_OFFSET_PS);  
+    LINK_STATES(SQ_PGM_CF_OFFSET_VS);
+    LINK_STATES(SQ_PGM_CF_OFFSET_GS);
+    LINK_STATES(SQ_PGM_CF_OFFSET_ES);
+    LINK_STATES(SQ_PGM_CF_OFFSET_FS);
+
+    LINK_STATES(PA_SU_POINT_SIZE);  
+    LINK_STATES(PA_SU_POINT_MINMAX);  
+    LINK_STATES(PA_SU_LINE_CNTL);  
+    LINK_STATES(PA_SC_LINE_STIPPLE); 
+    LINK_STATES(VGT_OUTPUT_PATH_CNTL);
+
+    LINK_STATES(VGT_GS_MODE);
+        
+    LINK_STATES(PA_SC_MPASS_PS_CNTL);
+    LINK_STATES(PA_SC_MODE_CNTL);  
+
+    LINK_STATES(VGT_PRIMITIVEID_EN);
+    LINK_STATES(VGT_DMA_NUM_INSTANCES);  
+
+    LINK_STATES(VGT_MULTI_PRIM_IB_RESET_EN);  
+
+    LINK_STATES(VGT_INSTANCE_STEP_RATE_0);
+    LINK_STATES(VGT_INSTANCE_STEP_RATE_1);
+    
+    LINK_STATES(VGT_STRMOUT_EN);  
+    LINK_STATES(VGT_REUSE_OFF);  
+
+    LINK_STATES(PA_SC_LINE_CNTL);  
+    LINK_STATES(PA_SC_AA_CONFIG);  
+    LINK_STATES(PA_SU_VTX_CNTL);  
+    LINK_STATES(PA_CL_GB_VERT_CLIP_ADJ);  
+    LINK_STATES(PA_CL_GB_VERT_DISC_ADJ);  
+    LINK_STATES(PA_CL_GB_HORZ_CLIP_ADJ);  
+    LINK_STATES(PA_CL_GB_HORZ_DISC_ADJ); 
+    LINK_STATES(PA_SC_AA_SAMPLE_LOCS_MCTX);
+    LINK_STATES(PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX);
+
+    LINK_STATES(CB_CLRCMP_CONTROL);  
+    LINK_STATES(CB_CLRCMP_SRC);  
+    LINK_STATES(CB_CLRCMP_DST);  
+    LINK_STATES(CB_CLRCMP_MSK);  
+
+    LINK_STATES(PA_SC_AA_MASK);  
+
+    LINK_STATES(VGT_VERTEX_REUSE_BLOCK_CNTL); 
+    LINK_STATES(VGT_OUT_DEALLOC_CNTL);  
+
+    LINK_STATES(DB_RENDER_CONTROL); 
+    LINK_STATES(DB_RENDER_OVERRIDE);
+
+    LINK_STATES(DB_HTILE_SURFACE);
+
+    LINK_STATES(DB_ALPHA_TO_MASK);
+
+    LINK_STATES(PA_SU_POLY_OFFSET_DB_FMT_CNTL);
+    LINK_STATES(PA_SU_POLY_OFFSET_CLAMP);
+    LINK_STATES(PA_SU_POLY_OFFSET_FRONT_SCALE);
+    LINK_STATES(PA_SU_POLY_OFFSET_FRONT_OFFSET);
+    LINK_STATES(PA_SU_POLY_OFFSET_BACK_SCALE);
+
+    pStateListWork->puiValue = (unsigned int*)&(r700->PA_SU_POLY_OFFSET_BACK_OFFSET); 
+    pStateListWork->unOffset = mmPA_SU_POLY_OFFSET_BACK_OFFSET - ASIC_CONTEXT_BASE_INDEX;
+    pStateListWork->pNext    = NULL;  /* END OF STATE LIST */
+
+    /* TODO : may need order sorting in case someone break the order of states in R700_CHIP_CONTEXT. */
+
+    return GL_TRUE;
+}
+
+GLboolean r700SendContextStates(context_t *context)
+{
+#if 0 //to be enable
+    R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+
+    ContextState * pState = r700->pStateList;
+    ContextState * pInit;
+    unsigned int   toSend;
+    unsigned int   ui;
+
+    while(NULL != pState)
+    {
+        toSend = 1;
+
+        pInit = pState;
+
+        while(NULL != pState->pNext)
+        {
+            if( (pState->pNext->unOffset - pState->unOffset) > 1 )
+            {
+                break;
+            }
+            else
+            {
+                pState = pState->pNext;
+                toSend++;
+            }
+        };
+
+        pState = pState->pNext;
+
+        R700_CMDBUF_CHECK_SPACE(toSend + 2);
+        R700EP3(context, IT_SET_CONTEXT_REG, toSend);
+        R700E32(context, pInit->unOffset);
+
+        for(ui=0; ui<toSend; ui++)
+        {
+            R700E32(context, *(pInit->puiValue));
+            pInit = pInit->pNext;
+        };
+    };
+#endif //to be enable
+    return GL_TRUE;
+}
+
+
+
+
diff --git a/src/mesa/drivers/dri/r600/r700_chip.h b/src/mesa/drivers/dri/r600/r700_chip.h
new file mode 100644 (file)
index 0000000..f081900
--- /dev/null
@@ -0,0 +1,458 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#ifndef _R700_CHIP_H_
+#define _R700_CHIP_H_
+
+#include "r600_context.h"
+
+#include "r600_reg.h"
+#include "r600_reg_auto_r6xx.h"
+#include "r600_reg_r6xx.h"
+#include "r600_reg_r7xx.h"
+
+#include "r700_chipoffset.h"
+
+#define SETfield(x, val, shift, mask)  ( (x) = ((x) & ~(mask)) | ((val) << (shift)) ) /* u32All */
+#define CLEARfield(x, mask)            ( (x) &= ~(mask) )
+#define SETbit(x, bit)                 ( (x) |= (bit) )
+#define CLEARbit(x, bit)               ( (x) &= ~(bit) )
+
+#define R700_TEXTURE_NUMBERUNITS 16
+
+/* Enum not show in r600_*.h */
+
+#define FETCH_RESOURCE_STRIDE 7
+
+#define ASIC_CONFIG_BASE_INDEX    0x2000
+#define ASIC_CONTEXT_BASE_INDEX   0xA000
+#define ASIC_CTL_CONST_BASE_INDEX 0xF3FC
+
+enum 
+{
+    SQ_ABSOLUTE                              = 0x00000000,
+    SQ_RELATIVE                              = 0x00000001,
+};
+
+enum 
+{
+    SQ_ALU_SCL_210                           = 0x00000000,
+    SQ_ALU_SCL_122                           = 0x00000001,
+    SQ_ALU_SCL_212                           = 0x00000002,
+    SQ_ALU_SCL_221                           = 0x00000003,
+};
+
+enum 
+{
+    SQ_TEX_UNNORMALIZED                      = 0x00000000,
+    SQ_TEX_NORMALIZED                        = 0x00000001,
+};
+
+enum 
+{
+    SQ_CF_PIXEL_MRT0                         = 0x00000000,
+    SQ_CF_PIXEL_MRT1                         = 0x00000001,
+    SQ_CF_PIXEL_MRT2                         = 0x00000002,
+    SQ_CF_PIXEL_MRT3                         = 0x00000003,
+    SQ_CF_PIXEL_MRT4                         = 0x00000004,
+    SQ_CF_PIXEL_MRT5                         = 0x00000005,
+    SQ_CF_PIXEL_MRT6                         = 0x00000006,
+    SQ_CF_PIXEL_MRT7                         = 0x00000007,
+    SQ_CF_PIXEL_Z                            = 0x0000003d,
+};
+
+typedef enum ENUM_SQ_CF_ARRAY_BASE_POS {
+SQ_CF_POS_0                              = 0x0000003c,
+SQ_CF_POS_1                              = 0x0000003d,
+SQ_CF_POS_2                              = 0x0000003e,
+SQ_CF_POS_3                              = 0x0000003f,
+} ENUM_SQ_CF_ARRAY_BASE_POS;
+
+enum
+{
+    PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit = 23,
+};
+
+enum 
+{
+    TEX_XYFilter_Point                       = 0x00000000,
+    TEX_XYFilter_Linear                      = 0x00000001,
+    TEX_XYFilter_Cubic                       = 0x00000002,
+    TEX_XYFilter_Cleartype                   = 0x00000003,
+
+    TEX_MipFilter_None                       = 0x00000000,
+    TEX_MipFilter_Point                      = 0x00000001,
+    TEX_MipFilter_Linear                     = 0x00000002,
+};
+
+enum 
+{
+    SQ_EXPORT_WRITE                          = 0x00000000,
+    SQ_EXPORT_WRITE_IND                      = 0x00000001,
+    SQ_EXPORT_WRITE_ACK                      = 0x00000002,
+    SQ_EXPORT_WRITE_IND_ACK                  = 0x00000003,
+};
+
+/* --------------------------------- */
+
+enum
+{
+    R700_PM4_PACKET0_NOP = 0x00000000,
+    R700_PM4_PACKET1_NOP = 0x40000000,
+    R700_PM4_PACKET2_NOP = 0x80000000,
+    R700_PM4_PACKET3_NOP = 0xC0000000,
+};
+
+#define  PM4_OPCODE_SET_INDEX_TYPE      (R700_PM4_PACKET3_NOP | (IT_INDEX_TYPE << 8))
+
+#define  PM4_OPCODE_DRAW_INDEX_AUTO     (R700_PM4_PACKET3_NOP | (IT_DRAW_INDEX_AUTO << 8))
+#define  PM4_OPCODE_DRAW_INDEX_IMMD     (R700_PM4_PACKET3_NOP | (IT_DRAW_INDEX_IMMD << 8))
+#define  PM4_OPCODE_WAIT_REG_MEM        (R700_PM4_PACKET3_NOP | (IT_WAIT_REG_MEM << 8))
+#define  PM4_OPCODE_SET_CONTEXT_REG     (R700_PM4_PACKET3_NOP | (IT_SET_CONTEXT_REG << 8))
+#define  PM4_OPCODE_SET_CONFIG_REG      (R700_PM4_PACKET3_NOP | (IT_SET_CONFIG_REG << 8))
+#define  PM4_OPCODE_SET_ALU_CONST       (R700_PM4_PACKET3_NOP | (IT_SET_ALU_CONST << 8))
+#define  PM4_OPCODE_SET_RESOURCE        (R700_PM4_PACKET3_NOP | (IT_SET_RESOURCE << 8))
+#define  PM4_OPCODE_SET_SAMPLER         (R700_PM4_PACKET3_NOP | (IT_SET_SAMPLER << 8))
+#define  PM4_OPCODE_CONTEXT_CONTROL     (R700_PM4_PACKET3_NOP | (IT_CONTEXT_CONTROL << 8))
+
+union UINT_FLOAT 
+{
+    unsigned int u32All;
+    float      f32All;
+};
+
+typedef struct _TEXTURE_STATE_STRUCT
+{
+    union UINT_FLOAT     SQ_TEX_RESOURCE0;
+    union UINT_FLOAT     SQ_TEX_RESOURCE1;
+    union UINT_FLOAT     SQ_TEX_RESOURCE2;
+    union UINT_FLOAT     SQ_TEX_RESOURCE3;
+    union UINT_FLOAT     SQ_TEX_RESOURCE4;
+    union UINT_FLOAT     SQ_TEX_RESOURCE5;
+    union UINT_FLOAT     SQ_TEX_RESOURCE6;
+    GLboolean                         enabled;
+} TEXTURE_STATE_STRUCT;
+
+typedef struct _SAMPLER_STATE_STRUCT
+{
+    union UINT_FLOAT      SQ_TEX_SAMPLER0;
+    union UINT_FLOAT      SQ_TEX_SAMPLER1;
+    union UINT_FLOAT      SQ_TEX_SAMPLER2;
+    GLboolean                         enabled;
+} SAMPLER_STATE_STRUCT;
+
+typedef struct _R700_TEXTURE_STATES
+{
+    TEXTURE_STATE_STRUCT *textures[R700_TEXTURE_NUMBERUNITS];
+    SAMPLER_STATE_STRUCT *samplers[R700_TEXTURE_NUMBERUNITS];
+} R700_TEXTURE_STATES;
+
+typedef struct ContextState
+{
+    unsigned int * puiValue;
+    unsigned int   unOffset;
+    struct ContextState * pNext;
+} ContextState;
+
+typedef struct _R700_CHIP_CONTEXT
+{
+       union UINT_FLOAT                DB_DEPTH_SIZE             ;  /* 0xA000 */
+       union UINT_FLOAT                DB_DEPTH_VIEW             ;  /* 0xA001 */
+       
+       union UINT_FLOAT                DB_DEPTH_BASE             ;  /* 0xA003 */
+       union UINT_FLOAT                DB_DEPTH_INFO             ;  /* 0xA004 */
+    union UINT_FLOAT                DB_HTILE_DATA_BASE        ;  /* 0xA005 */
+       
+    union UINT_FLOAT               DB_STENCIL_CLEAR          ;  /* 0xA00A */
+       union UINT_FLOAT                DB_DEPTH_CLEAR            ;  /* 0xA00B */
+       
+    union UINT_FLOAT                       PA_SC_SCREEN_SCISSOR_TL   ;  /* 0xA00C */
+       union UINT_FLOAT                    PA_SC_SCREEN_SCISSOR_BR   ;  /* 0xA00D */
+       
+       union UINT_FLOAT                CB_COLOR0_BASE            ;  /* 0xA010 */
+       
+       union UINT_FLOAT                CB_COLOR0_SIZE            ;  /* 0xA018 */
+       
+       union UINT_FLOAT                CB_COLOR0_VIEW            ;  /* 0xA020 */
+       
+       union UINT_FLOAT                CB_COLOR0_INFO            ;  /* 0xA028 */
+    union UINT_FLOAT                   CB_COLOR1_INFO            ;  /* 0xA029 */
+       union UINT_FLOAT                CB_COLOR2_INFO            ;  /* 0xA02A */
+       union UINT_FLOAT                CB_COLOR3_INFO            ;  /* 0xA02B */
+       union UINT_FLOAT                CB_COLOR4_INFO            ;  /* 0xA02C */
+       union UINT_FLOAT                CB_COLOR5_INFO            ;  /* 0xA02D */
+       union UINT_FLOAT                CB_COLOR6_INFO            ;  /* 0xA02E */
+       union UINT_FLOAT                CB_COLOR7_INFO            ;  /* 0xA02F */
+       
+       union UINT_FLOAT                CB_COLOR0_TILE            ;  /* 0xA030 */
+       
+       union UINT_FLOAT                CB_COLOR0_FRAG            ;  /* 0xA038 */
+       
+       union UINT_FLOAT                CB_COLOR0_MASK            ;  /* 0xA040 */
+               
+    union UINT_FLOAT                   PA_SC_WINDOW_OFFSET       ;  /* 0xA080 */
+       union UINT_FLOAT                    PA_SC_WINDOW_SCISSOR_TL   ;  /* 0xA081 */
+       union UINT_FLOAT                    PA_SC_WINDOW_SCISSOR_BR   ;  /* 0xA082 */
+       union UINT_FLOAT                PA_SC_CLIPRECT_RULE       ;  /* 0xA083 */
+       union UINT_FLOAT                PA_SC_CLIPRECT_0_TL       ;  /* 0xA084 */
+       union UINT_FLOAT                PA_SC_CLIPRECT_0_BR       ;  /* 0xA085 */
+       union UINT_FLOAT                PA_SC_CLIPRECT_1_TL       ;  /* 0xA086 */
+       union UINT_FLOAT                PA_SC_CLIPRECT_1_BR       ;  /* 0xA087 */
+       union UINT_FLOAT                PA_SC_CLIPRECT_2_TL       ;  /* 0xA088 */
+       union UINT_FLOAT                PA_SC_CLIPRECT_2_BR       ;  /* 0xA089 */
+       union UINT_FLOAT                PA_SC_CLIPRECT_3_TL       ;  /* 0xA08A */
+       union UINT_FLOAT                PA_SC_CLIPRECT_3_BR       ;  /* 0xA08B */
+
+       union UINT_FLOAT                PA_SC_EDGERULE            ;  /* 0xA08C */
+
+       union UINT_FLOAT                CB_TARGET_MASK            ;  /* 0xA08E */
+       union UINT_FLOAT                CB_SHADER_MASK            ;  /* 0xA08F */
+       union UINT_FLOAT        PA_SC_GENERIC_SCISSOR_TL  ;  /* 0xA090 */
+       union UINT_FLOAT        PA_SC_GENERIC_SCISSOR_BR  ;  /* 0xA091 */
+       
+       union UINT_FLOAT        PA_SC_VPORT_SCISSOR_0_TL  ;  /* 0xA094 */
+       union UINT_FLOAT        PA_SC_VPORT_SCISSOR_0_BR  ;  /* 0xA095 */
+       union UINT_FLOAT        PA_SC_VPORT_SCISSOR_1_TL  ;  /* 0xA096 */
+       union UINT_FLOAT        PA_SC_VPORT_SCISSOR_1_BR  ;  /* 0xA097 */
+       
+    union UINT_FLOAT           PA_SC_VPORT_ZMIN_0        ;  /* 0xA0B4 */
+       union UINT_FLOAT                PA_SC_VPORT_ZMAX_0        ;  /* 0xA0B5 */
+       
+       union UINT_FLOAT                        SX_MISC                   ;  /* 0xA0D4 */
+
+    union UINT_FLOAT           SQ_VTX_SEMANTIC_0         ;  /* 0xA0E0 */
+       union UINT_FLOAT                SQ_VTX_SEMANTIC_1         ;  /* 0xA0E1 */
+       union UINT_FLOAT                SQ_VTX_SEMANTIC_2         ;  /* 0xA0E2 */
+       union UINT_FLOAT                SQ_VTX_SEMANTIC_3         ;  /* 0xA0E3 */
+       union UINT_FLOAT                SQ_VTX_SEMANTIC_4         ;  /* 0xA0E4 */
+       union UINT_FLOAT                SQ_VTX_SEMANTIC_5         ;  /* 0xA0E5 */
+       union UINT_FLOAT                SQ_VTX_SEMANTIC_6         ;  /* 0xA0E6 */
+       union UINT_FLOAT                SQ_VTX_SEMANTIC_7         ;  /* 0xA0E7 */
+       union UINT_FLOAT                SQ_VTX_SEMANTIC_8         ;  /* 0xA0E8 */
+       union UINT_FLOAT                SQ_VTX_SEMANTIC_9         ;  /* 0xA0E9 */
+       union UINT_FLOAT                SQ_VTX_SEMANTIC_10        ;  /* 0xA0EA */
+       union UINT_FLOAT                SQ_VTX_SEMANTIC_11        ;  /* 0xA0EB */
+       union UINT_FLOAT                SQ_VTX_SEMANTIC_12        ;  /* 0xA0EC */
+       union UINT_FLOAT                SQ_VTX_SEMANTIC_13        ;  /* 0xA0ED */
+       union UINT_FLOAT                SQ_VTX_SEMANTIC_14        ;  /* 0xA0EE */
+       union UINT_FLOAT                SQ_VTX_SEMANTIC_15        ;  /* 0xA0EF */
+       union UINT_FLOAT                SQ_VTX_SEMANTIC_16        ;  /* 0xA0F0 */
+       union UINT_FLOAT                SQ_VTX_SEMANTIC_17        ;  /* 0xA0F1 */
+       union UINT_FLOAT                SQ_VTX_SEMANTIC_18        ;  /* 0xA0F2 */
+       union UINT_FLOAT                SQ_VTX_SEMANTIC_19        ;  /* 0xA0F3 */
+       union UINT_FLOAT                SQ_VTX_SEMANTIC_20        ;  /* 0xA0F4 */
+       union UINT_FLOAT                SQ_VTX_SEMANTIC_21        ;  /* 0xA0F5 */
+       union UINT_FLOAT                SQ_VTX_SEMANTIC_22        ;  /* 0xA0F6 */
+       union UINT_FLOAT                SQ_VTX_SEMANTIC_23        ;  /* 0xA0F7 */
+       union UINT_FLOAT                SQ_VTX_SEMANTIC_24        ;  /* 0xA0F8 */
+       union UINT_FLOAT                SQ_VTX_SEMANTIC_25        ;  /* 0xA0F9 */
+       union UINT_FLOAT                SQ_VTX_SEMANTIC_26        ;  /* 0xA0FA */
+       union UINT_FLOAT                SQ_VTX_SEMANTIC_27        ;  /* 0xA0FB */
+       union UINT_FLOAT                SQ_VTX_SEMANTIC_28        ;  /* 0xA0FC */
+       union UINT_FLOAT                SQ_VTX_SEMANTIC_29        ;  /* 0xA0FD */
+       union UINT_FLOAT                SQ_VTX_SEMANTIC_30        ;  /* 0xA0FE */
+       union UINT_FLOAT                SQ_VTX_SEMANTIC_31        ;  /* 0xA0FF */
+
+       union UINT_FLOAT                VGT_MAX_VTX_INDX          ;  /* 0xA100 */
+       union UINT_FLOAT                VGT_MIN_VTX_INDX          ;  /* 0xA101 */
+       union UINT_FLOAT                VGT_INDX_OFFSET           ;  /* 0xA102 */
+       union UINT_FLOAT  VGT_MULTI_PRIM_IB_RESET_INDX;  /* 0xA103 */
+       union UINT_FLOAT        SX_ALPHA_TEST_CONTROL     ;  /* 0xA104 */
+
+    union UINT_FLOAT                   CB_BLEND_RED              ;  /* 0xA105 */
+       union UINT_FLOAT                CB_BLEND_GREEN            ;  /* 0xA106 */
+       union UINT_FLOAT                CB_BLEND_BLUE             ;  /* 0xA107 */
+       union UINT_FLOAT                CB_BLEND_ALPHA            ;  /* 0xA108 */
+       
+       union UINT_FLOAT                PA_CL_VPORT_XSCALE        ;  /* 0xA10F */
+       union UINT_FLOAT        PA_CL_VPORT_XOFFSET       ;  /* 0xA110 */
+       union UINT_FLOAT                PA_CL_VPORT_YSCALE        ;  /* 0xA111 */
+       union UINT_FLOAT        PA_CL_VPORT_YOFFSET       ;  /* 0xA112 */
+       union UINT_FLOAT                PA_CL_VPORT_ZSCALE        ;  /* 0xA113 */
+       union UINT_FLOAT        PA_CL_VPORT_ZOFFSET       ;  /* 0xA114 */
+       
+       union UINT_FLOAT                SPI_VS_OUT_ID_0           ;  /* 0xA185 */
+       union UINT_FLOAT                SPI_VS_OUT_ID_1           ;  /* 0xA186 */
+    union UINT_FLOAT                   SPI_VS_OUT_ID_2           ;  /* 0xA187 */
+       union UINT_FLOAT                SPI_VS_OUT_ID_3           ;  /* 0xA188 */
+       union UINT_FLOAT                SPI_VS_OUT_ID_4           ;  /* 0xA189 */
+       union UINT_FLOAT                SPI_VS_OUT_ID_5           ;  /* 0xA18A */
+       union UINT_FLOAT                SPI_VS_OUT_ID_6           ;  /* 0xA18B */
+       union UINT_FLOAT                SPI_VS_OUT_ID_7           ;  /* 0xA18C */
+       union UINT_FLOAT                SPI_VS_OUT_ID_8           ;  /* 0xA18D */
+       union UINT_FLOAT                SPI_VS_OUT_ID_9           ;  /* 0xA18E */
+       
+       union UINT_FLOAT        SPI_PS_INPUT_CNTL_0       ;  /* 0xA191 */
+       union UINT_FLOAT        SPI_PS_INPUT_CNTL_1       ;  /* 0xA192 */
+       union UINT_FLOAT        SPI_PS_INPUT_CNTL_2       ;  /* 0xA193 */
+    union UINT_FLOAT           SPI_PS_INPUT_CNTL_3       ;  /* 0xA194 */
+       union UINT_FLOAT        SPI_PS_INPUT_CNTL_4       ;  /* 0xA195 */
+       union UINT_FLOAT        SPI_PS_INPUT_CNTL_5       ;  /* 0xA196 */
+       union UINT_FLOAT        SPI_PS_INPUT_CNTL_6       ;  /* 0xA197 */
+       union UINT_FLOAT        SPI_PS_INPUT_CNTL_7       ;  /* 0xA198 */
+       union UINT_FLOAT        SPI_PS_INPUT_CNTL_8       ;  /* 0xA199 */
+       union UINT_FLOAT        SPI_PS_INPUT_CNTL_9       ;  /* 0xA19A */
+       union UINT_FLOAT        SPI_PS_INPUT_CNTL_10      ;  /* 0xA19B */
+       union UINT_FLOAT        SPI_PS_INPUT_CNTL_11      ;  /* 0xA19C */
+       union UINT_FLOAT        SPI_PS_INPUT_CNTL_12      ;  /* 0xA19D */
+       union UINT_FLOAT        SPI_PS_INPUT_CNTL_13      ;  /* 0xA19E */
+       union UINT_FLOAT        SPI_PS_INPUT_CNTL_14      ;  /* 0xA19F */
+       union UINT_FLOAT        SPI_PS_INPUT_CNTL_15      ;  /* 0xA1A0 */
+       union UINT_FLOAT        SPI_PS_INPUT_CNTL_16      ;  /* 0xA1A1 */
+       union UINT_FLOAT        SPI_PS_INPUT_CNTL_17      ;  /* 0xA1A2 */
+       union UINT_FLOAT        SPI_PS_INPUT_CNTL_18      ;  /* 0xA1A3 */
+       union UINT_FLOAT        SPI_PS_INPUT_CNTL_19      ;  /* 0xA1A4 */
+       union UINT_FLOAT        SPI_PS_INPUT_CNTL_20      ;  /* 0xA1A5 */
+       union UINT_FLOAT        SPI_PS_INPUT_CNTL_21      ;  /* 0xA1A6 */
+       union UINT_FLOAT        SPI_PS_INPUT_CNTL_22      ;  /* 0xA1A7 */
+       union UINT_FLOAT        SPI_PS_INPUT_CNTL_23      ;  /* 0xA1A8 */
+       union UINT_FLOAT        SPI_PS_INPUT_CNTL_24      ;  /* 0xA1A9 */
+       union UINT_FLOAT        SPI_PS_INPUT_CNTL_25      ;  /* 0xA1AA */
+       union UINT_FLOAT        SPI_PS_INPUT_CNTL_26      ;  /* 0xA1AB */
+       union UINT_FLOAT        SPI_PS_INPUT_CNTL_27      ;  /* 0xA1AC */
+       union UINT_FLOAT        SPI_PS_INPUT_CNTL_28      ;  /* 0xA1AD */
+       union UINT_FLOAT        SPI_PS_INPUT_CNTL_29      ;  /* 0xA1AE */
+       union UINT_FLOAT        SPI_PS_INPUT_CNTL_30      ;  /* 0xA1AF */
+       union UINT_FLOAT        SPI_PS_INPUT_CNTL_31      ;  /* 0xA1B0 */
+       union UINT_FLOAT             SPI_VS_OUT_CONFIG         ;  /* 0xA1B1 */
+    union UINT_FLOAT           SPI_THREAD_GROUPING       ;  /* 0xA1B2 */
+       union UINT_FLOAT        SPI_PS_IN_CONTROL_0       ;  /* 0xA1B3 */
+       union UINT_FLOAT        SPI_PS_IN_CONTROL_1       ;  /* 0xA1B4 */
+
+       union UINT_FLOAT                SPI_INPUT_Z               ;  /* 0xA1B6 */
+    union UINT_FLOAT                   SPI_FOG_CNTL              ;  /* 0xA1B7 */
+       
+       union UINT_FLOAT                CB_BLEND0_CONTROL         ;  /* 0xA1E0 */
+
+    union UINT_FLOAT           CB_SHADER_CONTROL         ;  /* 0xA1E8 */
+       
+       /*union UINT_FLOAT              VGT_DRAW_INITIATOR*/        ;  /* 0xA1FC */
+       
+       union UINT_FLOAT                DB_DEPTH_CONTROL          ;  /* 0xA200 */
+       
+       union UINT_FLOAT                CB_COLOR_CONTROL          ;  /* 0xA202 */
+       union UINT_FLOAT                DB_SHADER_CONTROL         ;  /* 0xA203 */
+       union UINT_FLOAT                PA_CL_CLIP_CNTL           ;  /* 0xA204 */
+       union UINT_FLOAT                PA_SU_SC_MODE_CNTL        ;  /* 0xA205 */
+       union UINT_FLOAT                PA_CL_VTE_CNTL            ;  /* 0xA206 */
+    union UINT_FLOAT           PA_CL_VS_OUT_CNTL         ;  /* 0xA207 */
+    union UINT_FLOAT           PA_CL_NANINF_CNTL         ;  /* 0xA208 */
+       
+       union UINT_FLOAT                SQ_PGM_START_PS           ;  /* 0xA210 */
+       union UINT_FLOAT        SQ_PGM_RESOURCES_PS       ;  /* 0xA214 */
+       union UINT_FLOAT                SQ_PGM_EXPORTS_PS         ;  /* 0xA215 */
+       union UINT_FLOAT                SQ_PGM_START_VS           ;  /* 0xA216 */
+       union UINT_FLOAT                        SQ_PGM_RESOURCES_VS       ;  /* 0xA21A */
+    union UINT_FLOAT                   SQ_PGM_START_GS           ;  /* 0xA21B */
+       union UINT_FLOAT        SQ_PGM_RESOURCES_GS       ;  /* 0xA21F */
+       union UINT_FLOAT                SQ_PGM_START_ES           ;  /* 0xA220 */
+       union UINT_FLOAT        SQ_PGM_RESOURCES_ES       ;  /* 0xA224 */
+       union UINT_FLOAT                SQ_PGM_START_FS           ;  /* 0xA225 */
+       union UINT_FLOAT        SQ_PGM_RESOURCES_FS       ;  /* 0xA229 */
+       union UINT_FLOAT        SQ_ESGS_RING_ITEMSIZE     ;  /* 0xA22A */
+       union UINT_FLOAT        SQ_GSVS_RING_ITEMSIZE     ;  /* 0xA22B */
+       union UINT_FLOAT        SQ_ESTMP_RING_ITEMSIZE    ;  /* 0xA22C */
+       union UINT_FLOAT        SQ_GSTMP_RING_ITEMSIZE    ;  /* 0xA22D */
+       union UINT_FLOAT        SQ_VSTMP_RING_ITEMSIZE    ;  /* 0xA22E */
+       union UINT_FLOAT        SQ_PSTMP_RING_ITEMSIZE    ;  /* 0xA22F */
+       union UINT_FLOAT        SQ_FBUF_RING_ITEMSIZE     ;  /* 0xA230 */
+       union UINT_FLOAT        SQ_REDUC_RING_ITEMSIZE    ;  /* 0xA231 */
+       union UINT_FLOAT        SQ_GS_VERT_ITEMSIZE       ;  /* 0xA232 */
+       union UINT_FLOAT        SQ_PGM_CF_OFFSET_PS       ;  /* 0xA233 */
+    union UINT_FLOAT           SQ_PGM_CF_OFFSET_VS       ;  /* 0xA234 */
+       union UINT_FLOAT        SQ_PGM_CF_OFFSET_GS       ;  /* 0xA235 */
+       union UINT_FLOAT        SQ_PGM_CF_OFFSET_ES       ;  /* 0xA236 */
+       union UINT_FLOAT        SQ_PGM_CF_OFFSET_FS       ;  /* 0xA237 */
+               
+       union UINT_FLOAT                PA_SU_POINT_SIZE          ;  /* 0xA280 */
+       union UINT_FLOAT                PA_SU_POINT_MINMAX        ;  /* 0xA281 */
+       union UINT_FLOAT                PA_SU_LINE_CNTL           ;  /* 0xA282 */
+       union UINT_FLOAT                PA_SC_LINE_STIPPLE        ;  /* 0xA283 */
+    union UINT_FLOAT           VGT_OUTPUT_PATH_CNTL      ;  /* 0xA284 */
+
+    union UINT_FLOAT                   VGT_GS_MODE               ;  /* 0xA290 */
+       
+    union UINT_FLOAT           PA_SC_MPASS_PS_CNTL       ;  /* 0xA292 */
+       union UINT_FLOAT                PA_SC_MODE_CNTL           ;  /* 0xA293 */
+       
+    union UINT_FLOAT           VGT_PRIMITIVEID_EN        ;  /* 0xA2A1 */
+       union UINT_FLOAT        VGT_DMA_NUM_INSTANCES     ;  /* 0xA2A2 */
+       
+       union UINT_FLOAT        VGT_MULTI_PRIM_IB_RESET_EN;  /* 0xA2A5 */
+
+    union UINT_FLOAT   VGT_INSTANCE_STEP_RATE_0  ;  /* 0xA2A8 */
+       union UINT_FLOAT        VGT_INSTANCE_STEP_RATE_1  ;  /* 0xA2A9 */
+       
+       union UINT_FLOAT                VGT_STRMOUT_EN            ;  /* 0xA2AC */
+       union UINT_FLOAT                VGT_REUSE_OFF             ;  /* 0xA2AD */
+       
+       union UINT_FLOAT                PA_SC_LINE_CNTL           ;  /* 0xA300 */
+       union UINT_FLOAT                PA_SC_AA_CONFIG           ;  /* 0xA301 */
+       union UINT_FLOAT                PA_SU_VTX_CNTL            ;  /* 0xA302 */
+       union UINT_FLOAT        PA_CL_GB_VERT_CLIP_ADJ    ;  /* 0xA303 */
+       union UINT_FLOAT        PA_CL_GB_VERT_DISC_ADJ    ;  /* 0xA304 */
+       union UINT_FLOAT        PA_CL_GB_HORZ_CLIP_ADJ    ;  /* 0xA305 */
+       union UINT_FLOAT        PA_CL_GB_HORZ_DISC_ADJ    ;  /* 0xA306 */
+    union UINT_FLOAT   PA_SC_AA_SAMPLE_LOCS_MCTX ;  /* 0xA307 */
+       union UINT_FLOAT PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX; /* 0xA308 */
+
+       union UINT_FLOAT                CB_CLRCMP_CONTROL         ;  /* 0xA30C */
+       union UINT_FLOAT                CB_CLRCMP_SRC             ;  /* 0xA30D */
+       union UINT_FLOAT                CB_CLRCMP_DST             ;  /* 0xA30E */
+       union UINT_FLOAT                CB_CLRCMP_MSK             ;  /* 0xA30F */
+       
+       union UINT_FLOAT                PA_SC_AA_MASK             ;  /* 0xA312 */
+       
+       union UINT_FLOAT   VGT_VERTEX_REUSE_BLOCK_CNTL;  /* 0xA316 */
+       union UINT_FLOAT        VGT_OUT_DEALLOC_CNTL      ;  /* 0xA317 */
+       
+       union UINT_FLOAT                DB_RENDER_CONTROL         ;  /* 0xA343 */
+       union UINT_FLOAT                DB_RENDER_OVERRIDE        ;  /* 0xA344 */
+
+    union UINT_FLOAT           DB_HTILE_SURFACE          ;  /* 0xA349 */
+
+    union UINT_FLOAT           DB_ALPHA_TO_MASK          ;  /* 0xA351 */
+
+    union UINT_FLOAT PA_SU_POLY_OFFSET_DB_FMT_CNTL;   /* 0xA37E */
+       union UINT_FLOAT        PA_SU_POLY_OFFSET_CLAMP   ;      /* 0xA37F */
+       union UINT_FLOAT PA_SU_POLY_OFFSET_FRONT_SCALE;   /* 0xA380 */
+       union UINT_FLOAT PA_SU_POLY_OFFSET_FRONT_OFFSET; /* 0xA381 */
+       union UINT_FLOAT  PA_SU_POLY_OFFSET_BACK_SCALE;    /* 0xA382 */
+       union UINT_FLOAT PA_SU_POLY_OFFSET_BACK_OFFSET;   /* 0xA383 */
+
+    ContextState * pStateList;
+
+    R700_TEXTURE_STATES texture_states;
+       
+} R700_CHIP_CONTEXT;
+
+#define R700_CONTEXT_STATES(context) ((R700_CHIP_CONTEXT *)(context->chipobj.pvChipObj))
+
+extern GLboolean r700InitChipObject(context_t *context);
+extern GLboolean r700SendContextStates(context_t *context);
+
+#endif /* _R700_CHIP_H_ */
+
diff --git a/src/mesa/drivers/dri/r600/r700_chipoffset.h b/src/mesa/drivers/dri/r600/r700_chipoffset.h
new file mode 100644 (file)
index 0000000..9050b9a
--- /dev/null
@@ -0,0 +1,684 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ *   CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
+ */
+
+#ifndef _R700_CHIPOFFSET_H_
+#define _R700_CHIPOFFSET_H_
+
+#define mmWAIT_UNTIL                                    0x2010
+#define mmSCRATCH_REG0                                  0x2140
+#define mmGUI_SCRATCH_REG0                              0x2140
+#define mmSCRATCH_REG1                                  0x2141
+#define mmGUI_SCRATCH_REG1                              0x2141
+#define mmSCRATCH_REG2                                  0x2142
+#define mmGUI_SCRATCH_REG2                              0x2142
+#define mmSCRATCH_REG3                                  0x2143
+#define mmGUI_SCRATCH_REG3                              0x2143
+#define mmSCRATCH_REG4                                  0x2144
+#define mmGUI_SCRATCH_REG4                              0x2144
+#define mmSCRATCH_REG5                                  0x2145
+#define mmGUI_SCRATCH_REG5                              0x2145
+#define mmSCRATCH_REG6                                  0x2146
+#define mmGUI_SCRATCH_REG6                              0x2146
+#define mmSCRATCH_REG7                                  0x2147
+#define mmGUI_SCRATCH_REG7                              0x2147
+
+#define mmCP_COHER_CNTL                                 0x217C
+#define mmCP_COHER_SIZE                                 0x217D
+#define mmCP_COHER_BASE                                 0x217E
+#define mmCP_COHER_STATUS                               0x217F
+
+#define mmPA_CL_VPORT_XSCALE                            0xA10F
+#define mmPA_CL_VPORT_XOFFSET                           0xA110
+#define mmPA_CL_VPORT_YSCALE                            0xA111
+#define mmPA_CL_VPORT_YOFFSET                           0xA112
+#define mmPA_CL_VPORT_ZSCALE                            0xA113
+#define mmPA_CL_VPORT_ZOFFSET                           0xA114
+#define mmPA_CL_VPORT_XSCALE_1                          0xA115
+#define mmPA_CL_VPORT_XSCALE_2                          0xA11B
+#define mmPA_CL_VPORT_XSCALE_3                          0xA121
+#define mmPA_CL_VPORT_XSCALE_4                          0xA127
+#define mmPA_CL_VPORT_XSCALE_5                          0xA12D
+#define mmPA_CL_VPORT_XSCALE_6                          0xA133
+#define mmPA_CL_VPORT_XSCALE_7                          0xA139
+#define mmPA_CL_VPORT_XSCALE_8                          0xA13F
+#define mmPA_CL_VPORT_XSCALE_9                          0xA145
+#define mmPA_CL_VPORT_XSCALE_10                         0xA14B
+#define mmPA_CL_VPORT_XSCALE_11                         0xA151
+#define mmPA_CL_VPORT_XSCALE_12                         0xA157
+#define mmPA_CL_VPORT_XSCALE_13                         0xA15D
+#define mmPA_CL_VPORT_XSCALE_14                         0xA163
+#define mmPA_CL_VPORT_XSCALE_15                         0xA169
+#define mmPA_CL_VPORT_XOFFSET_1                         0xA116
+#define mmPA_CL_VPORT_XOFFSET_2                         0xA11C
+#define mmPA_CL_VPORT_XOFFSET_3                         0xA122
+#define mmPA_CL_VPORT_XOFFSET_4                         0xA128
+#define mmPA_CL_VPORT_XOFFSET_5                         0xA12E
+#define mmPA_CL_VPORT_XOFFSET_6                         0xA134
+#define mmPA_CL_VPORT_XOFFSET_7                         0xA13A
+#define mmPA_CL_VPORT_XOFFSET_8                         0xA140
+#define mmPA_CL_VPORT_XOFFSET_9                         0xA146
+#define mmPA_CL_VPORT_XOFFSET_10                        0xA14C
+#define mmPA_CL_VPORT_XOFFSET_11                        0xA152
+#define mmPA_CL_VPORT_XOFFSET_12                        0xA158
+#define mmPA_CL_VPORT_XOFFSET_13                        0xA15E
+#define mmPA_CL_VPORT_XOFFSET_14                        0xA164
+#define mmPA_CL_VPORT_XOFFSET_15                        0xA16A
+#define mmPA_CL_VPORT_YSCALE_1                          0xA117
+#define mmPA_CL_VPORT_YSCALE_2                          0xA11D
+#define mmPA_CL_VPORT_YSCALE_3                          0xA123
+#define mmPA_CL_VPORT_YSCALE_4                          0xA129
+#define mmPA_CL_VPORT_YSCALE_5                          0xA12F
+#define mmPA_CL_VPORT_YSCALE_6                          0xA135
+#define mmPA_CL_VPORT_YSCALE_7                          0xA13B
+#define mmPA_CL_VPORT_YSCALE_8                          0xA141
+#define mmPA_CL_VPORT_YSCALE_9                          0xA147
+#define mmPA_CL_VPORT_YSCALE_10                         0xA14D
+#define mmPA_CL_VPORT_YSCALE_11                         0xA153
+#define mmPA_CL_VPORT_YSCALE_12                         0xA159
+#define mmPA_CL_VPORT_YSCALE_13                         0xA15F
+#define mmPA_CL_VPORT_YSCALE_14                         0xA165
+#define mmPA_CL_VPORT_YSCALE_15                         0xA16B
+#define mmPA_CL_VPORT_YOFFSET_1                         0xA118
+#define mmPA_CL_VPORT_YOFFSET_2                         0xA11E
+#define mmPA_CL_VPORT_YOFFSET_3                         0xA124
+#define mmPA_CL_VPORT_YOFFSET_4                         0xA12A
+#define mmPA_CL_VPORT_YOFFSET_5                         0xA130
+#define mmPA_CL_VPORT_YOFFSET_6                         0xA136
+#define mmPA_CL_VPORT_YOFFSET_7                         0xA13C
+#define mmPA_CL_VPORT_YOFFSET_8                         0xA142
+#define mmPA_CL_VPORT_YOFFSET_9                         0xA148
+#define mmPA_CL_VPORT_YOFFSET_10                        0xA14E
+#define mmPA_CL_VPORT_YOFFSET_11                        0xA154
+#define mmPA_CL_VPORT_YOFFSET_12                        0xA15A
+#define mmPA_CL_VPORT_YOFFSET_13                        0xA160
+#define mmPA_CL_VPORT_YOFFSET_14                        0xA166
+#define mmPA_CL_VPORT_YOFFSET_15                        0xA16C
+#define mmPA_CL_VPORT_ZSCALE_1                          0xA119
+#define mmPA_CL_VPORT_ZSCALE_2                          0xA11F
+#define mmPA_CL_VPORT_ZSCALE_3                          0xA125
+#define mmPA_CL_VPORT_ZSCALE_4                          0xA12B
+#define mmPA_CL_VPORT_ZSCALE_5                          0xA131
+#define mmPA_CL_VPORT_ZSCALE_6                          0xA137
+#define mmPA_CL_VPORT_ZSCALE_7                          0xA13D
+#define mmPA_CL_VPORT_ZSCALE_8                          0xA143
+#define mmPA_CL_VPORT_ZSCALE_9                          0xA149
+#define mmPA_CL_VPORT_ZSCALE_10                         0xA14F
+#define mmPA_CL_VPORT_ZSCALE_11                         0xA155
+#define mmPA_CL_VPORT_ZSCALE_12                         0xA15B
+#define mmPA_CL_VPORT_ZSCALE_13                         0xA161
+#define mmPA_CL_VPORT_ZSCALE_14                         0xA167
+#define mmPA_CL_VPORT_ZSCALE_15                         0xA16D
+#define mmPA_CL_VPORT_ZOFFSET_1                         0xA11A
+#define mmPA_CL_VPORT_ZOFFSET_2                         0xA120
+#define mmPA_CL_VPORT_ZOFFSET_3                         0xA126
+#define mmPA_CL_VPORT_ZOFFSET_4                         0xA12C
+#define mmPA_CL_VPORT_ZOFFSET_5                         0xA132
+#define mmPA_CL_VPORT_ZOFFSET_6                         0xA138
+#define mmPA_CL_VPORT_ZOFFSET_7                         0xA13E
+#define mmPA_CL_VPORT_ZOFFSET_8                         0xA144
+#define mmPA_CL_VPORT_ZOFFSET_9                         0xA14A
+#define mmPA_CL_VPORT_ZOFFSET_10                        0xA150
+#define mmPA_CL_VPORT_ZOFFSET_11                        0xA156
+#define mmPA_CL_VPORT_ZOFFSET_12                        0xA15C
+#define mmPA_CL_VPORT_ZOFFSET_13                        0xA162
+#define mmPA_CL_VPORT_ZOFFSET_14                        0xA168
+#define mmPA_CL_VPORT_ZOFFSET_15                        0xA16E
+#define mmPA_CL_VTE_CNTL                                0xA206
+#define mmPA_CL_VS_OUT_CNTL                             0xA207
+#define mmPA_CL_NANINF_CNTL                             0xA208
+#define mmPA_CL_CLIP_CNTL                               0xA204
+#define mmPA_CL_GB_VERT_CLIP_ADJ                        0xA303
+#define mmPA_CL_GB_VERT_DISC_ADJ                        0xA304
+#define mmPA_CL_GB_HORZ_CLIP_ADJ                        0xA305
+#define mmPA_CL_GB_HORZ_DISC_ADJ                        0xA306
+#define mmPA_CL_UCP_0_X                                 0xA388
+#define mmPA_CL_UCP_0_Y                                 0xA389
+#define mmPA_CL_UCP_0_Z                                 0xA38A
+#define mmPA_CL_UCP_0_W                                 0xA38B
+#define mmPA_CL_UCP_1_X                                 0xA38C
+#define mmPA_CL_UCP_1_Y                                 0xA38D
+#define mmPA_CL_UCP_1_Z                                 0xA38E
+#define mmPA_CL_UCP_1_W                                 0xA38F
+#define mmPA_CL_UCP_2_X                                 0xA390
+#define mmPA_CL_UCP_2_Y                                 0xA391
+#define mmPA_CL_UCP_2_Z                                 0xA392
+#define mmPA_CL_UCP_2_W                                 0xA393
+#define mmPA_CL_UCP_3_X                                 0xA394
+#define mmPA_CL_UCP_3_Y                                 0xA395
+#define mmPA_CL_UCP_3_Z                                 0xA396
+#define mmPA_CL_UCP_3_W                                 0xA397
+#define mmPA_CL_UCP_4_X                                 0xA398
+#define mmPA_CL_UCP_4_Y                                 0xA399
+#define mmPA_CL_UCP_4_Z                                 0xA39A
+#define mmPA_CL_UCP_4_W                                 0xA39B
+#define mmPA_CL_UCP_5_X                                 0xA39C
+#define mmPA_CL_UCP_5_Y                                 0xA39D
+#define mmPA_CL_UCP_5_Z                                 0xA39E
+#define mmPA_CL_UCP_5_W                                 0xA39F
+#define mmPA_CL_POINT_X_RAD                             0xA384
+#define mmPA_CL_POINT_Y_RAD                             0xA385
+#define mmPA_CL_POINT_SIZE                              0xA386
+#define mmPA_CL_POINT_CULL_RAD                          0xA387
+
+#define mmPA_SU_VTX_CNTL                                0xA302
+#define mmPA_SU_POINT_SIZE                              0xA280
+#define mmPA_SU_POINT_MINMAX                            0xA281
+#define mmPA_SU_LINE_CNTL                               0xA282
+#define mmPA_SU_SC_MODE_CNTL                            0xA205
+#define mmPA_SU_POLY_OFFSET_DB_FMT_CNTL                 0xA37E
+#define mmPA_SU_POLY_OFFSET_CLAMP                       0xA37F
+#define mmPA_SU_POLY_OFFSET_FRONT_SCALE                 0xA380
+#define mmPA_SU_POLY_OFFSET_FRONT_OFFSET                0xA381
+#define mmPA_SU_POLY_OFFSET_BACK_SCALE                  0xA382
+#define mmPA_SU_POLY_OFFSET_BACK_OFFSET                 0xA383
+
+#define mmPA_SC_WINDOW_OFFSET                           0xA080
+#define mmPA_SC_AA_CONFIG                               0xA301
+#define mmPA_SC_AA_MASK                                 0xA312
+#define mmPA_SC_AA_SAMPLE_LOCS_MCTX                     0xA307
+#define mmPA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX              0xA308
+#define mmPA_SC_LINE_STIPPLE                            0xA283
+#define mmPA_SC_LINE_CNTL                               0xA300
+#define mmPA_SC_SCREEN_SCISSOR_TL                       0xA00C
+#define mmPA_SC_SCREEN_SCISSOR_BR                       0xA00D
+#define mmPA_SC_WINDOW_SCISSOR_TL                       0xA081
+#define mmPA_SC_WINDOW_SCISSOR_BR                       0xA082
+#define mmPA_SC_CLIPRECT_RULE                           0xA083
+#define mmPA_SC_CLIPRECT_0_TL                           0xA084
+#define mmPA_SC_CLIPRECT_0_BR                           0xA085
+#define mmPA_SC_CLIPRECT_1_TL                           0xA086
+#define mmPA_SC_CLIPRECT_1_BR                           0xA087
+#define mmPA_SC_CLIPRECT_2_TL                           0xA088
+#define mmPA_SC_CLIPRECT_2_BR                           0xA089
+#define mmPA_SC_CLIPRECT_3_TL                           0xA08A
+#define mmPA_SC_CLIPRECT_3_BR                           0xA08B
+#define mmPA_SC_EDGERULE                                0xA08C
+#define mmPA_SC_GENERIC_SCISSOR_TL                      0xA090
+#define mmPA_SC_GENERIC_SCISSOR_BR                      0xA091
+#define mmPA_SC_VPORT_SCISSOR_0_TL                      0xA094
+#define mmPA_SC_VPORT_SCISSOR_1_TL                      0xA096
+#define mmPA_SC_VPORT_SCISSOR_2_TL                      0xA098
+#define mmPA_SC_VPORT_SCISSOR_3_TL                      0xA09A
+#define mmPA_SC_VPORT_SCISSOR_4_TL                      0xA09C
+#define mmPA_SC_VPORT_SCISSOR_5_TL                      0xA09E
+#define mmPA_SC_VPORT_SCISSOR_6_TL                      0xA0A0
+#define mmPA_SC_VPORT_SCISSOR_7_TL                      0xA0A2
+#define mmPA_SC_VPORT_SCISSOR_8_TL                      0xA0A4
+#define mmPA_SC_VPORT_SCISSOR_9_TL                      0xA0A6
+#define mmPA_SC_VPORT_SCISSOR_10_TL                     0xA0A8
+#define mmPA_SC_VPORT_SCISSOR_11_TL                     0xA0AA
+#define mmPA_SC_VPORT_SCISSOR_12_TL                     0xA0AC
+#define mmPA_SC_VPORT_SCISSOR_13_TL                     0xA0AE
+#define mmPA_SC_VPORT_SCISSOR_14_TL                     0xA0B0
+#define mmPA_SC_VPORT_SCISSOR_15_TL                     0xA0B2
+#define mmPA_SC_VPORT_SCISSOR_0_BR                      0xA095
+#define mmPA_SC_VPORT_SCISSOR_1_BR                      0xA097
+#define mmPA_SC_VPORT_SCISSOR_2_BR                      0xA099
+#define mmPA_SC_VPORT_SCISSOR_3_BR                      0xA09B
+#define mmPA_SC_VPORT_SCISSOR_4_BR                      0xA09D
+#define mmPA_SC_VPORT_SCISSOR_5_BR                      0xA09F
+#define mmPA_SC_VPORT_SCISSOR_6_BR                      0xA0A1
+#define mmPA_SC_VPORT_SCISSOR_7_BR                      0xA0A3
+#define mmPA_SC_VPORT_SCISSOR_8_BR                      0xA0A5
+#define mmPA_SC_VPORT_SCISSOR_9_BR                      0xA0A7
+#define mmPA_SC_VPORT_SCISSOR_10_BR                     0xA0A9
+#define mmPA_SC_VPORT_SCISSOR_11_BR                     0xA0AB
+#define mmPA_SC_VPORT_SCISSOR_12_BR                     0xA0AD
+#define mmPA_SC_VPORT_SCISSOR_13_BR                     0xA0AF
+#define mmPA_SC_VPORT_SCISSOR_14_BR                     0xA0B1
+#define mmPA_SC_VPORT_SCISSOR_15_BR                     0xA0B3
+#define mmPA_SC_VPORT_ZMIN_0                            0xA0B4
+#define mmPA_SC_VPORT_ZMIN_1                            0xA0B6
+#define mmPA_SC_VPORT_ZMIN_2                            0xA0B8
+#define mmPA_SC_VPORT_ZMIN_3                            0xA0BA
+#define mmPA_SC_VPORT_ZMIN_4                            0xA0BC
+#define mmPA_SC_VPORT_ZMIN_5                            0xA0BE
+#define mmPA_SC_VPORT_ZMIN_6                            0xA0C0
+#define mmPA_SC_VPORT_ZMIN_7                            0xA0C2
+#define mmPA_SC_VPORT_ZMIN_8                            0xA0C4
+#define mmPA_SC_VPORT_ZMIN_9                            0xA0C6
+#define mmPA_SC_VPORT_ZMIN_10                           0xA0C8
+#define mmPA_SC_VPORT_ZMIN_11                           0xA0CA
+#define mmPA_SC_VPORT_ZMIN_12                           0xA0CC
+#define mmPA_SC_VPORT_ZMIN_13                           0xA0CE
+#define mmPA_SC_VPORT_ZMIN_14                           0xA0D0
+#define mmPA_SC_VPORT_ZMIN_15                           0xA0D2
+#define mmPA_SC_VPORT_ZMAX_0                            0xA0B5
+#define mmPA_SC_VPORT_ZMAX_1                            0xA0B7
+#define mmPA_SC_VPORT_ZMAX_2                            0xA0B9
+#define mmPA_SC_VPORT_ZMAX_3                            0xA0BB
+#define mmPA_SC_VPORT_ZMAX_4                            0xA0BD
+#define mmPA_SC_VPORT_ZMAX_5                            0xA0BF
+#define mmPA_SC_VPORT_ZMAX_6                            0xA0C1
+#define mmPA_SC_VPORT_ZMAX_7                            0xA0C3
+#define mmPA_SC_VPORT_ZMAX_8                            0xA0C5
+#define mmPA_SC_VPORT_ZMAX_9                            0xA0C7
+#define mmPA_SC_VPORT_ZMAX_10                           0xA0C9
+#define mmPA_SC_VPORT_ZMAX_11                           0xA0CB
+#define mmPA_SC_VPORT_ZMAX_12                           0xA0CD
+#define mmPA_SC_VPORT_ZMAX_13                           0xA0CF
+#define mmPA_SC_VPORT_ZMAX_14                           0xA0D1
+#define mmPA_SC_VPORT_ZMAX_15                           0xA0D3
+#define mmPA_SC_MODE_CNTL                               0xA293
+#define mmPA_SC_MPASS_PS_CNTL                           0xA292
+
+#define mmVGT_DRAW_INITIATOR                            0xA1FC
+#define mmVGT_EVENT_INITIATOR                           0xA2A4
+#define mmVGT_EVENT_ADDRESS_REG                         0xA1FE
+#define mmVGT_DMA_BASE_HI                               0xA1F9
+#define mmVGT_DMA_BASE                                  0xA1FA
+#define mmVGT_DMA_INDEX_TYPE                            0xA29F
+#define mmVGT_DMA_NUM_INSTANCES                         0xA2A2
+#define mmVGT_DMA_SIZE                                  0xA29D
+
+#define mmVGT_IMMED_DATA                                0xA1FD
+#define mmVGT_INDEX_TYPE                                0x2257
+#define mmVGT_NUM_INDICES                               0x225C
+#define mmVGT_NUM_INSTANCES                             0x225D
+#define mmVGT_PRIMITIVE_TYPE                            0x2256
+#define mmVGT_PRIMITIVEID_EN                            0xA2A1
+#define mmVGT_VTX_CNT_EN                                0xA2AE
+#define mmVGT_REUSE_OFF                                 0xA2AD
+#define mmVGT_INSTANCE_STEP_RATE_0                      0xA2A8
+#define mmVGT_INSTANCE_STEP_RATE_1                      0xA2A9
+#define mmVGT_MAX_VTX_INDX                              0xA100
+#define mmVGT_MIN_VTX_INDX                              0xA101
+#define mmVGT_INDX_OFFSET                               0xA102
+#define mmVGT_VERTEX_REUSE_BLOCK_CNTL                   0xA316
+#define mmVGT_OUT_DEALLOC_CNTL                          0xA317
+#define mmVGT_MULTI_PRIM_IB_RESET_INDX                  0xA103
+#define mmVGT_MULTI_PRIM_IB_RESET_EN                    0xA2A5
+#define mmVGT_ENHANCE                                   0xA294
+#define mmVGT_OUTPUT_PATH_CNTL                          0xA284
+#define mmVGT_HOS_CNTL                                  0xA285
+#define mmVGT_HOS_MAX_TESS_LEVEL                        0xA286
+#define mmVGT_HOS_MIN_TESS_LEVEL                        0xA287
+#define mmVGT_HOS_REUSE_DEPTH                           0xA288
+#define mmVGT_GROUP_PRIM_TYPE                           0xA289
+#define mmVGT_GROUP_FIRST_DECR                          0xA28A
+#define mmVGT_GROUP_DECR                                0xA28B
+#define mmVGT_GROUP_VECT_0_CNTL                         0xA28C
+#define mmVGT_GROUP_VECT_1_CNTL                         0xA28D
+#define mmVGT_GROUP_VECT_0_FMT_CNTL                     0xA28E
+#define mmVGT_GROUP_VECT_1_FMT_CNTL                     0xA28F
+#define mmVGT_GS_MODE                                   0xA290
+#define mmVGT_GS_OUT_PRIM_TYPE                          0xA29B
+
+#define mmVGT_STRMOUT_EN                                0xA2AC
+#define mmVGT_STRMOUT_BUFFER_SIZE_0                     0xA2B4
+#define mmVGT_STRMOUT_BUFFER_SIZE_1                     0xA2B8
+#define mmVGT_STRMOUT_BUFFER_SIZE_2                     0xA2BC
+#define mmVGT_STRMOUT_BUFFER_SIZE_3                     0xA2C0
+#define mmVGT_STRMOUT_BUFFER_OFFSET_0                   0xA2B7
+#define mmVGT_STRMOUT_BUFFER_OFFSET_1                   0xA2BB
+#define mmVGT_STRMOUT_BUFFER_OFFSET_2                   0xA2BF
+#define mmVGT_STRMOUT_BUFFER_OFFSET_3                   0xA2C3
+#define mmVGT_STRMOUT_VTX_STRIDE_0                      0xA2B5
+#define mmVGT_STRMOUT_VTX_STRIDE_1                      0xA2B9
+#define mmVGT_STRMOUT_VTX_STRIDE_2                      0xA2BD
+#define mmVGT_STRMOUT_VTX_STRIDE_3                      0xA2C1
+#define mmVGT_STRMOUT_BUFFER_BASE_0                     0xA2B6
+#define mmVGT_STRMOUT_BUFFER_BASE_1                     0xA2BA
+#define mmVGT_STRMOUT_BUFFER_BASE_2                     0xA2BE
+#define mmVGT_STRMOUT_BUFFER_BASE_3                     0xA2C2
+#define mmVGT_STRMOUT_BUFFER_EN                         0xA2C8
+#define mmVGT_STRMOUT_BASE_OFFSET_0                     0xA2C4
+#define mmVGT_STRMOUT_BASE_OFFSET_1                     0xA2C5
+#define mmVGT_STRMOUT_BASE_OFFSET_2                     0xA2C6
+#define mmVGT_STRMOUT_BASE_OFFSET_3                     0xA2C7
+#define mmVGT_STRMOUT_BASE_OFFSET_HI_0                  0xA2D1
+#define mmVGT_STRMOUT_BASE_OFFSET_HI_1                  0xA2D2
+#define mmVGT_STRMOUT_BASE_OFFSET_HI_2                  0xA2D3
+#define mmVGT_STRMOUT_BASE_OFFSET_HI_3                  0xA2D4
+#define mmVGT_STRMOUT_DRAW_OPAQUE_OFFSET                0xA2CA
+#define mmVGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE    0xA2CB
+#define mmVGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE         0xA2CC
+
+#define mmSQ_PGM_START_PS                               0xA210
+#define mmSQ_PGM_CF_OFFSET_PS                           0xA233
+#define mmSQ_PGM_RESOURCES_PS                           0xA214
+#define mmSQ_PGM_EXPORTS_PS                             0xA215
+#define mmSQ_PGM_START_VS                               0xA216
+#define mmSQ_PGM_CF_OFFSET_VS                           0xA234
+#define mmSQ_PGM_RESOURCES_VS                           0xA21A
+#define mmSQ_PGM_START_GS                               0xA21B
+#define mmSQ_PGM_CF_OFFSET_GS                           0xA235
+#define mmSQ_PGM_RESOURCES_GS                           0xA21F
+#define mmSQ_PGM_START_ES                               0xA220
+#define mmSQ_PGM_CF_OFFSET_ES                           0xA236
+#define mmSQ_PGM_RESOURCES_ES                           0xA224
+#define mmSQ_PGM_START_FS                               0xA225
+#define mmSQ_PGM_CF_OFFSET_FS                           0xA237
+#define mmSQ_PGM_RESOURCES_FS                           0xA229
+#define mmSQ_ESGS_RING_ITEMSIZE                         0xA22A
+#define mmSQ_GSVS_RING_ITEMSIZE                         0xA22B
+#define mmSQ_ESTMP_RING_ITEMSIZE                        0xA22C
+#define mmSQ_GSTMP_RING_ITEMSIZE                        0xA22D
+#define mmSQ_VSTMP_RING_ITEMSIZE                        0xA22E
+#define mmSQ_PSTMP_RING_ITEMSIZE                        0xA22F
+#define mmSQ_FBUF_RING_ITEMSIZE                         0xA230
+#define mmSQ_REDUC_RING_ITEMSIZE                        0xA231
+#define mmSQ_GS_VERT_ITEMSIZE                           0xA232
+#define mmSQ_VTX_SEMANTIC_CLEAR                         0xA238
+
+#define mmSQ_VTX_SEMANTIC_0                             0xA0E0
+#define mmSQ_VTX_SEMANTIC_1                             0xA0E1
+#define mmSQ_VTX_SEMANTIC_2                             0xA0E2
+#define mmSQ_VTX_SEMANTIC_3                             0xA0E3
+#define mmSQ_VTX_SEMANTIC_4                             0xA0E4
+#define mmSQ_VTX_SEMANTIC_5                             0xA0E5
+#define mmSQ_VTX_SEMANTIC_6                             0xA0E6
+#define mmSQ_VTX_SEMANTIC_7                             0xA0E7
+#define mmSQ_VTX_SEMANTIC_8                             0xA0E8
+#define mmSQ_VTX_SEMANTIC_9                             0xA0E9
+#define mmSQ_VTX_SEMANTIC_10                            0xA0EA
+#define mmSQ_VTX_SEMANTIC_11                            0xA0EB
+#define mmSQ_VTX_SEMANTIC_12                            0xA0EC
+#define mmSQ_VTX_SEMANTIC_13                            0xA0ED
+#define mmSQ_VTX_SEMANTIC_14                            0xA0EE
+#define mmSQ_VTX_SEMANTIC_15                            0xA0EF
+#define mmSQ_VTX_SEMANTIC_16                            0xA0F0
+#define mmSQ_VTX_SEMANTIC_17                            0xA0F1
+#define mmSQ_VTX_SEMANTIC_18                            0xA0F2
+#define mmSQ_VTX_SEMANTIC_19                            0xA0F3
+#define mmSQ_VTX_SEMANTIC_20                            0xA0F4
+#define mmSQ_VTX_SEMANTIC_21                            0xA0F5
+#define mmSQ_VTX_SEMANTIC_22                            0xA0F6
+#define mmSQ_VTX_SEMANTIC_23                            0xA0F7
+#define mmSQ_VTX_SEMANTIC_24                            0xA0F8
+#define mmSQ_VTX_SEMANTIC_25                            0xA0F9
+#define mmSQ_VTX_SEMANTIC_26                            0xA0FA
+#define mmSQ_VTX_SEMANTIC_27                            0xA0FB
+#define mmSQ_VTX_SEMANTIC_28                            0xA0FC
+#define mmSQ_VTX_SEMANTIC_29                            0xA0FD
+#define mmSQ_VTX_SEMANTIC_30                            0xA0FE
+#define mmSQ_VTX_SEMANTIC_31                            0xA0FF
+
+#define mmSQ_ALU_CONST_CACHE_PS_0                       0xA250
+#define mmSQ_ALU_CONST_CACHE_PS_1                       0xA251
+#define mmSQ_ALU_CONST_CACHE_PS_2                       0xA252
+#define mmSQ_ALU_CONST_CACHE_PS_3                       0xA253
+#define mmSQ_ALU_CONST_CACHE_PS_4                       0xA254
+#define mmSQ_ALU_CONST_CACHE_PS_5                       0xA255
+#define mmSQ_ALU_CONST_CACHE_PS_6                       0xA256
+#define mmSQ_ALU_CONST_CACHE_PS_7                       0xA257
+#define mmSQ_ALU_CONST_CACHE_PS_8                       0xA258
+#define mmSQ_ALU_CONST_CACHE_PS_9                       0xA259
+#define mmSQ_ALU_CONST_CACHE_PS_10                      0xA25A
+#define mmSQ_ALU_CONST_CACHE_PS_11                      0xA25B
+#define mmSQ_ALU_CONST_CACHE_PS_12                      0xA25C
+#define mmSQ_ALU_CONST_CACHE_PS_13                      0xA25D
+#define mmSQ_ALU_CONST_CACHE_PS_14                      0xA25E
+#define mmSQ_ALU_CONST_CACHE_PS_15                      0xA25F
+#define mmSQ_ALU_CONST_CACHE_VS_0                       0xA260
+#define mmSQ_ALU_CONST_CACHE_VS_1                       0xA261
+#define mmSQ_ALU_CONST_CACHE_VS_2                       0xA262
+#define mmSQ_ALU_CONST_CACHE_VS_3                       0xA263
+#define mmSQ_ALU_CONST_CACHE_VS_4                       0xA264
+#define mmSQ_ALU_CONST_CACHE_VS_5                       0xA265
+#define mmSQ_ALU_CONST_CACHE_VS_6                       0xA266
+#define mmSQ_ALU_CONST_CACHE_VS_7                       0xA267
+#define mmSQ_ALU_CONST_CACHE_VS_8                       0xA268
+#define mmSQ_ALU_CONST_CACHE_VS_9                       0xA269
+#define mmSQ_ALU_CONST_CACHE_VS_10                      0xA26A
+#define mmSQ_ALU_CONST_CACHE_VS_11                      0xA26B
+#define mmSQ_ALU_CONST_CACHE_VS_12                      0xA26C
+#define mmSQ_ALU_CONST_CACHE_VS_13                      0xA26D
+#define mmSQ_ALU_CONST_CACHE_VS_14                      0xA26E
+#define mmSQ_ALU_CONST_CACHE_VS_15                      0xA26F
+#define mmSQ_ALU_CONST_CACHE_GS_0                       0xA270
+#define mmSQ_ALU_CONST_CACHE_GS_1                       0xA271
+#define mmSQ_ALU_CONST_CACHE_GS_2                       0xA272
+#define mmSQ_ALU_CONST_CACHE_GS_3                       0xA273
+#define mmSQ_ALU_CONST_CACHE_GS_4                       0xA274
+#define mmSQ_ALU_CONST_CACHE_GS_5                       0xA275
+#define mmSQ_ALU_CONST_CACHE_GS_6                       0xA276
+#define mmSQ_ALU_CONST_CACHE_GS_7                       0xA277
+#define mmSQ_ALU_CONST_CACHE_GS_8                       0xA278
+#define mmSQ_ALU_CONST_CACHE_GS_9                       0xA279
+#define mmSQ_ALU_CONST_CACHE_GS_10                      0xA27A
+#define mmSQ_ALU_CONST_CACHE_GS_11                      0xA27B
+#define mmSQ_ALU_CONST_CACHE_GS_12                      0xA27C
+#define mmSQ_ALU_CONST_CACHE_GS_13                      0xA27D
+#define mmSQ_ALU_CONST_CACHE_GS_14                      0xA27E
+#define mmSQ_ALU_CONST_CACHE_GS_15                      0xA27F
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_0                 0xA050
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_1                 0xA051
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_2                 0xA052
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_3                 0xA053
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_4                 0xA054
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_5                 0xA055
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_6                 0xA056
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_7                 0xA057
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_8                 0xA058
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_9                 0xA059
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_10                0xA05A
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_11                0xA05B
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_12                0xA05C
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_13                0xA05D
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_14                0xA05E
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_15                0xA05F
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_0                 0xA060
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_1                 0xA061
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_2                 0xA062
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_3                 0xA063
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_4                 0xA064
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_5                 0xA065
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_6                 0xA066
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_7                 0xA067
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_8                 0xA068
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_9                 0xA069
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_10                0xA06A
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_11                0xA06B
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_12                0xA06C
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_13                0xA06D
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_14                0xA06E
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_15                0xA06F
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_0                 0xA070
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_1                 0xA071
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_2                 0xA072
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_3                 0xA073
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_4                 0xA074
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_5                 0xA075
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_6                 0xA076
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_7                 0xA077
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_8                 0xA078
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_9                 0xA079
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_10                0xA07A
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_11                0xA07B
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_12                0xA07C
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_13                0xA07D
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_14                0xA07E
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_15                0xA07F
+
+#define mmSPI_VS_OUT_ID_0                               0xA185
+#define mmSPI_VS_OUT_ID_1                               0xA186
+#define mmSPI_VS_OUT_ID_2                               0xA187
+#define mmSPI_VS_OUT_ID_3                               0xA188
+#define mmSPI_VS_OUT_ID_4                               0xA189
+#define mmSPI_VS_OUT_ID_5                               0xA18A
+#define mmSPI_VS_OUT_ID_6                               0xA18B
+#define mmSPI_VS_OUT_ID_7                               0xA18C
+#define mmSPI_VS_OUT_ID_8                               0xA18D
+#define mmSPI_VS_OUT_ID_9                               0xA18E
+#define mmSPI_PS_INPUT_CNTL_0                           0xA191
+#define mmSPI_PS_INPUT_CNTL_1                           0xA192
+#define mmSPI_PS_INPUT_CNTL_2                           0xA193
+#define mmSPI_PS_INPUT_CNTL_3                           0xA194
+#define mmSPI_PS_INPUT_CNTL_4                           0xA195
+#define mmSPI_PS_INPUT_CNTL_5                           0xA196
+#define mmSPI_PS_INPUT_CNTL_6                           0xA197
+#define mmSPI_PS_INPUT_CNTL_7                           0xA198
+#define mmSPI_PS_INPUT_CNTL_8                           0xA199
+#define mmSPI_PS_INPUT_CNTL_9                           0xA19A
+#define mmSPI_PS_INPUT_CNTL_10                          0xA19B
+#define mmSPI_PS_INPUT_CNTL_11                          0xA19C
+#define mmSPI_PS_INPUT_CNTL_12                          0xA19D
+#define mmSPI_PS_INPUT_CNTL_13                          0xA19E
+#define mmSPI_PS_INPUT_CNTL_14                          0xA19F
+#define mmSPI_PS_INPUT_CNTL_15                          0xA1A0
+#define mmSPI_PS_INPUT_CNTL_16                          0xA1A1
+#define mmSPI_PS_INPUT_CNTL_17                          0xA1A2
+#define mmSPI_PS_INPUT_CNTL_18                          0xA1A3
+#define mmSPI_PS_INPUT_CNTL_19                          0xA1A4
+#define mmSPI_PS_INPUT_CNTL_20                          0xA1A5
+#define mmSPI_PS_INPUT_CNTL_21                          0xA1A6
+#define mmSPI_PS_INPUT_CNTL_22                          0xA1A7
+#define mmSPI_PS_INPUT_CNTL_23                          0xA1A8
+#define mmSPI_PS_INPUT_CNTL_24                          0xA1A9
+#define mmSPI_PS_INPUT_CNTL_25                          0xA1AA
+#define mmSPI_PS_INPUT_CNTL_26                          0xA1AB
+#define mmSPI_PS_INPUT_CNTL_27                          0xA1AC
+#define mmSPI_PS_INPUT_CNTL_28                          0xA1AD
+#define mmSPI_PS_INPUT_CNTL_29                          0xA1AE
+#define mmSPI_PS_INPUT_CNTL_30                          0xA1AF
+#define mmSPI_PS_INPUT_CNTL_31                          0xA1B0
+#define mmSPI_VS_OUT_CONFIG                             0xA1B1
+#define mmSPI_THREAD_GROUPING                           0xA1B2
+#define mmSPI_PS_IN_CONTROL_0                           0xA1B3
+#define mmSPI_PS_IN_CONTROL_1                           0xA1B4
+#define mmSPI_INTERP_CONTROL_0                          0xA1B5
+#define mmSPI_INPUT_Z                                   0xA1B6
+#define mmSPI_FOG_CNTL                                  0xA1B7
+#define mmSPI_FOG_FUNC_SCALE                            0xA1B8
+#define mmSPI_FOG_FUNC_BIAS                             0xA1B9
+
+#define mmSX_MISC                                       0xA0D4
+
+#define mmSX_ALPHA_TEST_CONTROL                         0xA104
+#define mmSX_ALPHA_REF                                  0xA10E
+
+#define mmDB_DEPTH_BASE                                 0xA003
+#define mmDB_DEPTH_INFO                                 0xA004
+#define mmDB_HTILE_DATA_BASE                            0xA005
+#define mmDB_DEPTH_SIZE                                 0xA000
+#define mmDB_DEPTH_VIEW                                 0xA001
+#define mmDB_RENDER_CONTROL                             0xA343
+#define mmDB_RENDER_OVERRIDE                            0xA344
+#define mmDB_SHADER_CONTROL                             0xA203
+#define mmDB_STENCIL_CLEAR                              0xA00A
+#define mmDB_DEPTH_CLEAR                                0xA00B
+#define mmDB_HTILE_SURFACE                              0xA349
+#define mmDB_PRELOAD_CONTROL                            0xA34C
+#define mmDB_PREFETCH_LIMIT                             0xA34D
+#define mmDB_STENCILREFMASK                             0xA10C
+#define mmDB_STENCILREFMASK_BF                          0xA10D
+#define mmDB_SRESULTS_COMPARE_STATE0                    0xA34A
+#define mmDB_SRESULTS_COMPARE_STATE1                    0xA34B
+#define mmDB_DEPTH_CONTROL                              0xA200
+#define mmDB_ALPHA_TO_MASK                              0xA351
+
+#define mmCB_BLEND_RED                                  0xA105
+#define mmCB_BLEND_GREEN                                0xA106
+#define mmCB_BLEND_BLUE                                 0xA107
+#define mmCB_BLEND_ALPHA                                0xA108
+#define mmCB_FOG_RED_R6XX                               0xA109
+#define mmCB_FOG_GREEN_R6XX                             0xA10A
+#define mmCB_FOG_BLUE_R6XX                              0xA10B
+#define mmCB_BLEND_CONTROL                              0xA201
+#define mmCB_COLOR_CONTROL                              0xA202
+#define mmCB_BLEND0_CONTROL                             0xA1E0
+#define mmCB_BLEND1_CONTROL                             0xA1E1
+#define mmCB_BLEND2_CONTROL                             0xA1E2
+#define mmCB_BLEND3_CONTROL                             0xA1E3
+#define mmCB_BLEND4_CONTROL                             0xA1E4
+#define mmCB_BLEND5_CONTROL                             0xA1E5
+#define mmCB_BLEND6_CONTROL                             0xA1E6
+#define mmCB_BLEND7_CONTROL                             0xA1E7
+#define mmCB_CLRCMP_CONTROL                             0xA30C
+#define mmCB_CLRCMP_SRC                                 0xA30D
+#define mmCB_CLRCMP_DST                                 0xA30E
+#define mmCB_CLRCMP_MSK                                 0xA30F
+#define mmCB_COLOR0_BASE                                0xA010
+#define mmCB_COLOR1_BASE                                0xA011
+#define mmCB_COLOR2_BASE                                0xA012
+#define mmCB_COLOR3_BASE                                0xA013
+#define mmCB_COLOR4_BASE                                0xA014
+#define mmCB_COLOR5_BASE                                0xA015
+#define mmCB_COLOR6_BASE                                0xA016
+#define mmCB_COLOR7_BASE                                0xA017
+#define mmCB_COLOR0_SIZE                                0xA018
+#define mmCB_COLOR1_SIZE                                0xA019
+#define mmCB_COLOR2_SIZE                                0xA01A
+#define mmCB_COLOR3_SIZE                                0xA01B
+#define mmCB_COLOR4_SIZE                                0xA01C
+#define mmCB_COLOR5_SIZE                                0xA01D
+#define mmCB_COLOR6_SIZE                                0xA01E
+#define mmCB_COLOR7_SIZE                                0xA01F
+#define mmCB_COLOR0_VIEW                                0xA020
+#define mmCB_COLOR1_VIEW                                0xA021
+#define mmCB_COLOR2_VIEW                                0xA022
+#define mmCB_COLOR3_VIEW                                0xA023
+#define mmCB_COLOR4_VIEW                                0xA024
+#define mmCB_COLOR5_VIEW                                0xA025
+#define mmCB_COLOR6_VIEW                                0xA026
+#define mmCB_COLOR7_VIEW                                0xA027
+#define mmCB_COLOR0_INFO                                0xA028
+#define mmCB_COLOR1_INFO                                0xA029
+#define mmCB_COLOR2_INFO                                0xA02A
+#define mmCB_COLOR3_INFO                                0xA02B
+#define mmCB_COLOR4_INFO                                0xA02C
+#define mmCB_COLOR5_INFO                                0xA02D
+#define mmCB_COLOR6_INFO                                0xA02E
+#define mmCB_COLOR7_INFO                                0xA02F
+#define mmCB_COLOR0_TILE                                0xA030
+#define mmCB_COLOR1_TILE                                0xA031
+#define mmCB_COLOR2_TILE                                0xA032
+#define mmCB_COLOR3_TILE                                0xA033
+#define mmCB_COLOR4_TILE                                0xA034
+#define mmCB_COLOR5_TILE                                0xA035
+#define mmCB_COLOR6_TILE                                0xA036
+#define mmCB_COLOR7_TILE                                0xA037
+#define mmCB_COLOR0_FRAG                                0xA038
+#define mmCB_COLOR1_FRAG                                0xA039
+#define mmCB_COLOR2_FRAG                                0xA03A
+#define mmCB_COLOR3_FRAG                                0xA03B
+#define mmCB_COLOR4_FRAG                                0xA03C
+#define mmCB_COLOR5_FRAG                                0xA03D
+#define mmCB_COLOR6_FRAG                                0xA03E
+#define mmCB_COLOR7_FRAG                                0xA03F
+#define mmCB_COLOR0_MASK                                0xA040
+#define mmCB_COLOR1_MASK                                0xA041
+#define mmCB_COLOR2_MASK                                0xA042
+#define mmCB_COLOR3_MASK                                0xA043
+#define mmCB_COLOR4_MASK                                0xA044
+#define mmCB_COLOR5_MASK                                0xA045
+#define mmCB_COLOR6_MASK                                0xA046
+#define mmCB_COLOR7_MASK                                0xA047
+#define mmCB_CLEAR_RED_R6XX                             0xA048
+#define mmCB_CLEAR_GREEN_R6XX                           0xA049
+#define mmCB_CLEAR_BLUE_R6XX                            0xA04A
+#define mmCB_CLEAR_ALPHA_R6XX                           0xA04B
+#define mmCB_TARGET_MASK                                0xA08E
+#define mmCB_SHADER_MASK                                0xA08F
+#define mmCB_SHADER_CONTROL                             0xA1E8
+
+#define mmSQ_VTX_BASE_VTX_LOC                           0xF3FC
+#define mmSQ_VTX_START_INST_LOC                         0xF3FD
+
+#endif /* _R700_CHIPOFFSET_H_ */
+
diff --git a/src/mesa/drivers/dri/r600/r700_clear.c b/src/mesa/drivers/dri/r600/r700_clear.c
new file mode 100644 (file)
index 0000000..5725360
--- /dev/null
@@ -0,0 +1,267 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ *   CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
+ */
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "main/enums.h"
+
+#include "r600_context.h"
+#include "r700_chip.h"
+
+#if 0 /* to be enabled */
+#include "r700_shaderinst.h"
+#include "r700_emit.h"
+#endif /* to be enabled */
+
+extern void r700InitState (GLcontext * ctx);
+
+#if 0 /* to be enabled */
+extern void r700SetupVTXConstans(GLcontext  * ctx, 
+                          unsigned int nStreamID,
+                          unsigned int aos_offset,
+                          unsigned int size,      /* number of elements in vector */
+                          unsigned int stride,
+                          unsigned int count);
+extern GLboolean r700SendContextStates(context_t *context);
+extern GLboolean r700SyncSurf(context_t *context);
+#endif /* to be enabled */
+
+static GLboolean r700ClearFast(context_t *context, GLbitfield mask)
+{
+    /* TODO, fast clear need implementation */
+    return GL_FALSE;
+}
+
+static GLboolean r700ClearWithDraw(context_t *context, GLbitfield mask)
+{
+    GLcontext *ctx = GL_CONTEXT(context);
+
+#if 0 /* to be enabled */
+    R700_CHIP_CONTEXT  r700Saved;
+    R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj);
+    struct r600_dma_region  aos_vs;
+    struct r600_dma_region  aos_fs;
+    struct r600_dma_region  aos_vb;
+    aos_vs.buf = NULL;
+    aos_fs.buf = NULL;
+    aos_vb.buf = NULL;
+
+
+    unsigned int ui;
+    GLfloat  fTemp;
+    GLfloat  fVb[] = { 1.0f,  1.0f, 1.0f, 1.0f,
+                      -1.0f, -1.0f, 1.0f, 1.0f,
+                       1.0f, -1.0f, 1.0f, 1.0f,
+                       1.0f,  1.0f, 1.0f, 1.0f,
+                      -1.0f,  1.0f, 1.0f, 1.0f,
+                      -1.0f, -1.0f, 1.0f, 1.0f}; /* TODO : Z set here */
+    unsigned int uVs[] = { 0xC,        0x81000000, 0x4,        0xA01C0000, 
+                           0xC001203C, 0x94000688, 0xC001C000, 0x94200688,
+                           0x10000001, 0x540C90,   0x10000401, 0x20540C90,
+                           0x10000801, 0x40540C90, 0x90000C01, 0x60400C90,
+                           0x10000100, 0x600C90,   0x10000500, 0x20600C90,
+                           0x10000900, 0x40600C90, 0x90000D00, 0x60680C90,
+                           0x7C000000, 0x2D1001,   0x80000,    0xBEADEAF };
+    unsigned int uFs[] = { 0x2,        0xA00C0000, 0xC0008000, 0x94200688,
+                           0x10000000, 0x340C90,   0x10000400, 0x20340C90,
+                           0x10000800, 0x40340C90, 0x90000C00, 0x60200C90};
+
+    if (context->screen->chip.type <= CHIP_TYPE_RV670)
+    {
+        uVs[9]  = 0x541910;
+        uVs[11] = 0x20541910;
+        uVs[13] = 0x40541910;
+        uVs[15] = 0x60401910;
+        uVs[17] = 0x601910;
+        uVs[19] = 0x20601910;
+        uVs[21] = 0x40601910;
+        uVs[23] = 0x60681910;
+        uFs[5]  = 0x341910;
+        uFs[7]  = 0x20341910;
+        uFs[9]  = 0x40341910;
+        uFs[11] = 0x60201910;
+    }
+
+    r700SyncSurf(context);
+
+    /* Save current chip object. */
+    memcpy(&r700Saved, r700, sizeof(R700_CHIP_CONTEXT));
+
+    r700InitState(ctx);
+
+    r700->CB_COLOR0_BASE.u32All = context->target.rt.gpu >> 8;
+
+    /* Turn off perspective divid. */
+    SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_XY_FMT_bit);
+    SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_Z_FMT_bit);
+    SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_W0_FMT_bit);
+
+    if( (mask & BUFFER_BIT_FRONT_LEFT) || (mask & BUFFER_BIT_BACK_LEFT) )
+    {   /* Enable render target output. */
+        SETfield(r700->CB_TARGET_MASK.u32All, 0xF, TARGET0_ENABLE_shift, TARGET0_ENABLE_mask);
+    }
+    else
+    {   /* Disable render target output. */
+        CLEARfield(r700->CB_TARGET_MASK.u32All, TARGET0_ENABLE_mask); /* TODO : OGL need 4 rt. */
+    }
+    if (mask & BUFFER_BIT_DEPTH)
+    {   
+        /* Set correct Z to clear. */
+        SETbit(r700->DB_DEPTH_CONTROL.u32All, Z_WRITE_ENABLE_bit);
+        SETbit(r700->DB_DEPTH_CONTROL.u32All, Z_ENABLE_bit);
+        SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_ALWAYS, ZFUNC_shift, ZFUNC_mask);
+        fTemp = ctx->Depth.Clear;
+        for(ui=2; ui<24;)
+        {
+            fVb[ui] = fTemp;
+            ui += 4;
+        }
+    }
+    else
+    {   
+        /* Disable Z write. */
+        CLEARbit(r700->DB_DEPTH_CONTROL.u32All, Z_WRITE_ENABLE_bit);
+        CLEARbit(r700->DB_DEPTH_CONTROL.u32All, Z_ENABLE_bit);
+    }
+
+    /* Setup vb */
+    R700_CMDBUF_CHECK_SPACE(6);
+    R700EP3 (context, IT_SET_CTL_CONST, 1);
+    R700E32 (context, mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX);
+    R700E32 (context, 0);
+    R700EP3 (context, IT_SET_CTL_CONST, 1);
+    R700E32 (context, mmSQ_VTX_START_INST_LOC - ASIC_CTL_CONST_BASE_INDEX);
+    R700E32 (context, 0);
+    (context->chipobj.EmitVec)(ctx, &aos_vb, (GLvoid *)fVb, 4, 16, 6);
+    r700SetupVTXConstans(ctx, VERT_ATTRIB_POS, (unsigned int)aos_vb.aos_offset, 4, 16, 6);
+
+    /* Setup shaders, copied from dump */
+    r700->SQ_PGM_RESOURCES_PS.u32All = 0;
+       r700->SQ_PGM_RESOURCES_VS.u32All = 0;
+       SETbit(r700->SQ_PGM_RESOURCES_PS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit);
+    SETbit(r700->SQ_PGM_RESOURCES_VS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit);
+        /* vs */
+    (context->chipobj.EmitShader)(ctx, &aos_vs, (GLvoid *)(&uVs[0]), 28);
+    r700->SQ_PGM_START_VS.u32All     = (aos_vs.aos_offset >> 8) & 0x00FFFFFF;
+    r700->SQ_PGM_RESOURCES_VS.u32All = 0x00800004;
+            /* vs const */ /* TODO : Set color here */
+    R700_CMDBUF_CHECK_SPACE(4 + 2);
+    R700EP3 (context, IT_SET_ALU_CONST, 4);
+    R700E32 (context, SQ_ALU_CONSTANT_VS_OFFSET * 4); 
+    R700E32 (context, *((unsigned int*)&(ctx->Color.ClearColor[0])));
+    R700E32 (context, *((unsigned int*)&(ctx->Color.ClearColor[1])));
+    R700E32 (context, *((unsigned int*)&(ctx->Color.ClearColor[2])));
+    R700E32 (context, *((unsigned int*)&(ctx->Color.ClearColor[3])));
+
+    r700->SPI_VS_OUT_CONFIG.u32All   = 0x00000000;
+       r700->SPI_PS_IN_CONTROL_0.u32All = 0x20000001;
+        /* ps */
+    (context->chipobj.EmitShader)(ctx, &aos_fs, (GLvoid *)(&uFs[0]), 12); 
+    r700->SQ_PGM_START_PS.u32All     = (aos_fs.aos_offset >> 8) & 0x00FFFFFF;
+    r700->SQ_PGM_RESOURCES_PS.u32All = 0x00800002;
+    r700->SQ_PGM_EXPORTS_PS.u32All   = 0x00000002;        
+    r700->DB_SHADER_CONTROL.u32All   = 0x00000200; 
+
+    r700->CB_SHADER_CONTROL.u32All = 0x00000001;
+
+    /* set a valid base address to make the command checker happy */
+    r700->SQ_PGM_START_FS.u32All     = (aos_fs.aos_offset >> 8) & 0x00FFFFFF;
+    r700->SQ_PGM_START_ES.u32All     = (aos_fs.aos_offset >> 8) & 0x00FFFFFF;
+    r700->SQ_PGM_START_GS.u32All     = (aos_fs.aos_offset >> 8) & 0x00FFFFFF;
+
+    /* Now, send the states */
+    r700SendContextStates(context);
+
+    /* Draw */
+    GLuint numEntires, j;
+    GLuint numIndices = 6;
+    unsigned int VGT_DRAW_INITIATOR = 0;
+    unsigned int VGT_INDEX_TYPE     = 0;
+    unsigned int VGT_PRIMITIVE_TYPE = 0;
+    unsigned int VGT_NUM_INDICES    = 0;
+    
+    numEntires = 2                 /* VGT_INDEX_TYPE */
+                 + 3               /* VGT_PRIMITIVE_TYPE */
+                 + numIndices + 3; /* DRAW_INDEX_IMMD */                 
+                 
+    R700_CMDBUF_CHECK_SPACE(numEntires);  
+
+    SETfield(VGT_INDEX_TYPE, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
+
+    R700EP3(context, IT_INDEX_TYPE, 0);
+    R700E32(context, VGT_INDEX_TYPE);
+
+    VGT_NUM_INDICES = numIndices;
+
+    SETfield(VGT_PRIMITIVE_TYPE, DI_PT_TRILIST, VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask);
+    R700EP3(context, IT_SET_CONFIG_REG, 1);
+    R700E32(context, mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX);
+    R700E32(context, VGT_PRIMITIVE_TYPE);
+
+    SETfield(VGT_DRAW_INITIATOR, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
+    SETfield(VGT_DRAW_INITIATOR, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask);
+
+    R700EP3(context, IT_DRAW_INDEX_IMMD, (numIndices + 1));
+    R700E32(context, VGT_NUM_INDICES);
+    R700E32(context, VGT_DRAW_INITIATOR);
+
+    for (j=0; j<numIndices; j++)
+    {
+        R700E32(context, j);
+    }
+
+    (context->chipobj.FlushCmdBuffer)(context);
+
+    /* TODO : keep these in context, don't load and release every time. */
+    (context->chipobj.FreeDmaRegion)(context, &aos_vs);
+    (context->chipobj.FreeDmaRegion)(context, &aos_fs);
+    (context->chipobj.FreeDmaRegion)(context, &aos_vb);
+
+    /* Restore chip object. */
+    memcpy(r700, &r700Saved, sizeof(R700_CHIP_CONTEXT));
+
+#endif /* to be enabled */
+
+    return GL_TRUE;
+}
+
+void r700Clear(GLcontext * ctx, GLbitfield mask)
+{
+    context_t *context = R700_CONTEXT(ctx);
+
+    if( GL_TRUE == r700ClearFast(context, mask) )
+    {
+        return;
+    }
+
+    r700ClearWithDraw(context, mask);
+}
+
+
diff --git a/src/mesa/drivers/dri/r600/r700_clear.h b/src/mesa/drivers/dri/r600/r700_clear.h
new file mode 100644 (file)
index 0000000..bed1d3a
--- /dev/null
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ *   CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
+ */
+
+#ifndef __r700_CLEAR_H__
+#define __r700_CLEAR_H__
+
+extern void r700Clear(GLcontext * ctx, GLbitfield mask);
+
+#endif /* __r700_CLEAR_H__ */
diff --git a/src/mesa/drivers/dri/r600/r700_debug.c b/src/mesa/drivers/dri/r600/r700_debug.c
new file mode 100644 (file)
index 0000000..9e6ad4a
--- /dev/null
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ *   CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
+ */
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "main/glheader.h"
+
+#include "r700_debug.h"
+#include "r600_context.h"
+
+void NormalizeLogErrorCode(int nError)
+{
+    //TODO
+}
+
+void r700_error(int nLocalError, char* fmt, ...)
+{
+    va_list args;
+
+    NormalizeLogErrorCode(nLocalError);
+
+       va_start(args, fmt);
+    fprintf(stderr, fmt, args);
+    va_end(args);
+}
+
+void DumpHwBinary(int type, void *addr, int size)
+{
+    int i;
+    unsigned int *pHw = (unsigned int *)addr;
+    switch (type)
+    {
+        case DUMP_PIXEL_SHADER:
+            DEBUGF("Pixel Shader\n");
+        break;
+        case DUMP_VERTEX_SHADER:
+            DEBUGF("Vertex Shader\n");
+        break;
+        case DUMP_FETCH_SHADER:
+            DEBUGF("Fetch Shader\n");
+        break;
+    }
+
+    for (i = 0; i < size; i++)
+    {
+        DEBUGP("0x%08x,\t", *pHw);
+        if (i%4 == 3)
+            DEBUGP("\n", *pHw);
+        pHw++;
+
+    }
+}
+
diff --git a/src/mesa/drivers/dri/r600/r700_debug.h b/src/mesa/drivers/dri/r600/r700_debug.h
new file mode 100644 (file)
index 0000000..e810e6d
--- /dev/null
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ *   CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
+ */
+
+#ifndef _R700_DEBUG_H_
+#define _R700_DEBUG_H_
+
+enum R700_ERROR
+{
+    ERROR_ASM_VTX_CLAUSE     = 0x1000,
+    ERROR_ASM_UNKOWNCLAUSE   = 0x1001,
+    ERROR_ASM_ALLOCEXPORTCF  = 0x1002,
+    ERROR_ASM_ALLOCVTXCF     = 0x1003,
+    ERROR_ASM_ALLOCTEXCF     = 0x1004,
+    ERROR_ASM_ALLOCALUCF     = 0x1005,
+    ERROR_ASM_UNKNOWNILINST  = 0x1006,
+    ERROR_ASM_SRCARGUMENT    = 0x1007,
+    ERROR_ASM_DSTARGUMENT    = 0x1008,
+    ERROR_ASM_TEXINSTRUCTION = 0x1009,
+    ERROR_ASM_ALUINSTRUCTION = 0x100A,
+    ERROR_ASM_INSTDSTTRACK   = 0x100B,
+    ERROR_ASM_TEXDSTBADTYPE  = 0x100C,
+    ERROR_ASM_ALUSRCBADTYPE  = 0x100D,
+    ERROR_ASM_ALUSRCSELECT   = 0x100E,
+    ERROR_ASM_ALUSRCNUMBER   = 0x100F,
+    ERROR_ASM_ALUDSTBADTYPE  = 0x1010,
+    ERROR_ASM_CONSTCHANNEL   = 0x1011,
+    ERROR_ASM_BADSCALARBZ    = 0x1012,
+    ERROR_ASM_BADGPRRESERVE  = 0x1013,
+    ERROR_ASM_BADVECTORBZ    = 0x1014,
+    ERROR_ASM_BADTEXINST     = 0x1015,
+    ERROR_ASM_BADTEXSRC      = 0x1016,
+    ERROR_ASM_BADEXPORTTYPE  = 0x1017,
+
+
+    TODO_ASM_CONSTTEXADDR   = 0x8000,
+    TODO_ASM_NEEDIMPINST    = 0x8001,
+    TODO_ASM_TXB            = 0x8002,
+    TODO_ASM_TXP            = 0x8003
+};
+
+enum R700_DUMP_TYPE
+{
+    DUMP_VERTEX_SHADER      = 0x1,
+    DUMP_PIXEL_SHADER       = 0x2,
+    DUMP_FETCH_SHADER       = 0x4,
+};
+
+#define DEBUGF printf
+#define DEBUGP printf
+
+void NormalizeLogErrorCode(int nError);
+/*NormalizeLogErrorCode(nLocalError); */
+void r700_error(int nLocalError, char *fmt, ...);      
+extern void DumpHwBinary(int, void *, int);
+
+#ifdef STANDALONE_COMPILER
+#ifdef __cplusplus
+extern "C"
+{
+#endif //__cplusplus
+
+void LogString(char* szStr);
+
+#ifdef __cplusplus
+}
+#endif //__cplusplus
+#endif /*STANDALONE_COMPILER*/
+
+#endif /*_R700_DEBUG_H_*/
diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c
new file mode 100644 (file)
index 0000000..2b81382
--- /dev/null
@@ -0,0 +1,365 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ *   CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
+ */
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "main/imports.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_statevars.h"
+
+#include "r600_context.h"
+
+#include "r700_chip.h"
+#include "r700_fragprog.h"
+
+/* to be enabled */
+#if 0
+#include "r700_emit.h"
+#endif
+
+#include "r700_debug.h"
+
+//TODO : Validate FP input with VP output.
+void Map_Fragment_Program(r700_AssemblerBase         *pAsm,
+                                                 struct gl_fragment_program *mesa_fp)
+{
+       unsigned int unBit;
+    unsigned int i;
+    GLuint       ui;
+
+       pAsm->number_used_registers = 0;
+
+//Input mapping : mesa_fp->Base.InputsRead set the flag, set in 
+       //The flags parsed in parse_attrib_binding. FRAG_ATTRIB_COLx, FRAG_ATTRIB_TEXx, ...
+       //MUST match order in Map_Vertex_Output
+       unBit = 1 << FRAG_ATTRIB_COL0;
+       if(mesa_fp->Base.InputsRead & unBit)
+       {
+               pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0] = pAsm->number_used_registers++;
+       }
+
+       unBit = 1 << FRAG_ATTRIB_COL1;
+       if(mesa_fp->Base.InputsRead & unBit)
+       {
+               pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1] = pAsm->number_used_registers++;
+       }
+
+       for(i=0; i<8; i++)
+       {
+               unBit = 1 << (FRAG_ATTRIB_TEX0 + i);
+               if(mesa_fp->Base.InputsRead & unBit)
+               {
+                       pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i] = pAsm->number_used_registers++;
+               }
+       }
+
+/* Map temporary registers (GPRs) */
+    pAsm->starting_temp_register_number = pAsm->number_used_registers;
+
+    if(mesa_fp->Base.NumNativeTemporaries >= mesa_fp->Base.NumTemporaries)
+    {
+           pAsm->number_used_registers += mesa_fp->Base.NumNativeTemporaries;
+    }
+    else
+    {
+        pAsm->number_used_registers += mesa_fp->Base.NumTemporaries;
+    }
+
+/* Output mapping */
+       pAsm->number_of_exports = 0;
+       pAsm->number_of_colorandz_exports = 0; /* don't include stencil and mask out. */
+       pAsm->starting_export_register_number = pAsm->number_used_registers;
+       unBit = 1 << FRAG_RESULT_COLOR;
+       if(mesa_fp->Base.OutputsWritten & unBit)
+       {
+               pAsm->uiFP_OutputMap[FRAG_RESULT_COLOR] = pAsm->number_used_registers++;
+               pAsm->number_of_exports++;
+               pAsm->number_of_colorandz_exports++;
+       }
+       unBit = 1 << FRAG_RESULT_DEPTH;
+       if(mesa_fp->Base.OutputsWritten & unBit)
+       {
+        pAsm->depth_export_register_number = pAsm->number_used_registers;
+               pAsm->uiFP_OutputMap[FRAG_RESULT_DEPTH] = pAsm->number_used_registers++;
+               pAsm->number_of_exports++;
+               pAsm->number_of_colorandz_exports++;
+       }
+
+    pAsm->pucOutMask = (unsigned char*) MALLOC(pAsm->number_of_exports);    
+    for(ui=0; ui<pAsm->number_of_exports; ui++)
+    {
+        pAsm->pucOutMask[ui] = 0x0;
+    }
+       
+       pAsm->uFirstHelpReg = pAsm->number_used_registers;
+}
+
+GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp,
+                                                               struct gl_fragment_program   *mesa_fp)
+{
+    GLuint i, j;
+    GLint * puiTEMPwrites;
+    struct prog_instruction * pILInst;
+    InstDeps         *pInstDeps;
+    struct prog_instruction * texcoord_DepInst;
+    GLint              nDepInstID;
+
+    puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries);
+    for(i=0; i<mesa_fp->Base.NumTemporaries; i++)
+    {
+        puiTEMPwrites[i] = -1;
+    }
+
+    pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_fp->Base.NumInstructions);
+
+    for(i=0; i<mesa_fp->Base.NumInstructions; i++)
+    {
+        pInstDeps[i].nDstDep = -1;
+        pILInst = &(mesa_fp->Base.Instructions[i]);
+
+        //Dst
+        if(pILInst->DstReg.File == PROGRAM_TEMPORARY)
+        {
+            //Set lastwrite for the temp
+            puiTEMPwrites[pILInst->DstReg.Index] = i;
+        }
+
+        //Src
+        for(j=0; j<3; j++)
+        {
+            if(pILInst->SrcReg[j].File == PROGRAM_TEMPORARY)
+            {
+                //Set dep.
+                pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index];
+            }
+            else
+            {
+                pInstDeps[i].nSrcDeps[j] = -1;
+            }
+        }
+    }
+
+    fp->r700AsmCode.pInstDeps = pInstDeps;
+
+    FREE(puiTEMPwrites);
+
+    //Find dep for tex inst    
+    for(i=0; i<mesa_fp->Base.NumInstructions; i++)
+    {
+        pILInst = &(mesa_fp->Base.Instructions[i]);
+
+        if(GL_TRUE == IsTex(pILInst->Opcode))
+        {   //src0 is the tex coord register, src1 is texunit, src2 is textype
+            nDepInstID = pInstDeps[i].nSrcDeps[0];
+            if(nDepInstID >= 0)
+            {
+                texcoord_DepInst = &(mesa_fp->Base.Instructions[nDepInstID]);
+                if(GL_TRUE == IsAlu(texcoord_DepInst->Opcode) )
+                {
+                    pInstDeps[nDepInstID].nDstDep = i;
+                    pInstDeps[i].nDstDep = i;
+                }
+                else if(GL_TRUE == IsTex(texcoord_DepInst->Opcode) )
+                {
+                    pInstDeps[i].nDstDep = i;
+                }
+                else
+                {   //... other deps?
+                }
+            }
+        }
+       }
+
+    return GL_TRUE;
+}
+
+GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp,
+                                                            struct gl_fragment_program   *mesa_fp)
+{
+       GLuint    number_of_colors_exported;
+       GLboolean z_enabled = GL_FALSE;
+       GLuint    unBit;
+
+    //Init_Program
+       Init_r700_AssemblerBase( SPT_FP, &(fp->r700AsmCode), &(fp->r700Shader) );
+       Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp);
+
+    if( GL_FALSE == Find_Instruction_Dependencies_fp(fp, mesa_fp) )
+       {
+               return GL_FALSE;
+    }
+       
+       if( GL_FALSE == AssembleInstr(mesa_fp->Base.NumInstructions,
+                                  &(mesa_fp->Base.Instructions[0]), 
+                                  &(fp->r700AsmCode)) )
+       {
+               return GL_FALSE;
+       }
+
+    if(GL_FALSE == Process_Fragment_Exports(&(fp->r700AsmCode), mesa_fp->Base.OutputsWritten) )
+    {
+        return GL_FALSE;
+    }
+
+    fp->r700Shader.nRegs = (fp->r700AsmCode.number_used_registers == 0) ? 0 
+                         : (fp->r700AsmCode.number_used_registers - 1);
+
+       fp->r700Shader.nParamExports = fp->r700AsmCode.number_of_exports;
+
+       number_of_colors_exported = fp->r700AsmCode.number_of_colorandz_exports;
+
+       unBit = 1 << FRAG_RESULT_DEPTH;
+       if(mesa_fp->Base.OutputsWritten & unBit)
+       {
+               z_enabled = GL_TRUE;
+               number_of_colors_exported--;
+       }
+
+       fp->r700Shader.exportMode = number_of_colors_exported << 1 | z_enabled;
+
+    fp->translated = GL_TRUE;
+
+       return GL_TRUE;
+}
+
+GLboolean r700SetupFragmentProgram(GLcontext * ctx)
+{
+    context_t *context = R700_CONTEXT(ctx);   
+    
+    R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj);
+
+    struct r700_fragment_program *fp = (struct r700_fragment_program *)
+                                          (ctx->FragmentProgram._Current);
+
+    struct gl_program_parameter_list *paramList;
+    unsigned int unNumParamData;
+    unsigned int ui;
+
+    unsigned int unNumOfReg;
+    
+    if(GL_FALSE == fp->loaded)
+    {
+        if(fp->r700Shader.bNeedsAssembly == GL_TRUE)
+           {
+                   Assemble( &(fp->r700Shader) );
+           }
+/* to be enabled */
+#if 0
+        /* Load fp to gpu */
+        (context->chipobj.EmitShader)(ctx, 
+                       &(fp->shadercode), 
+                       (GLvoid *)(fp->r700Shader.pProgram),
+                       fp->r700Shader.uShaderBinaryDWORDSize);                                         
+#endif
+        fp->loaded = GL_TRUE;
+    }
+
+    DumpHwBinary(DUMP_PIXEL_SHADER, (GLvoid *)(fp->r700Shader.pProgram),
+                 fp->r700Shader.uShaderBinaryDWORDSize);
+
+    /* TODO : enable this after MemUse fixed *=
+    (context->chipobj.MemUse)(context, fp->shadercode.buf->id);
+    */
+/* to be enabled */
+#if 0
+    r700->SQ_PGM_START_PS.u32All     = (fp->shadercode.aos_offset >> 8) & 0x00FFFFFF;
+#endif
+
+    unNumOfReg = fp->r700Shader.nRegs + 1;
+
+    ui = (r700->SPI_PS_IN_CONTROL_0.u32All & NUM_INTERP_mask) / (1 << NUM_INTERP_shift);
+
+    ui = ui ? unNumOfReg : ui;
+
+    SETfield(r700->SQ_PGM_RESOURCES_PS.u32All, ui, NUM_GPRS_shift, NUM_GPRS_mask); 
+    
+    CLEARbit(r700->SQ_PGM_RESOURCES_PS.u32All, UNCACHED_FIRST_INST_bit);
+
+    if(fp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */
+       {
+        SETfield(r700->SQ_PGM_RESOURCES_PS.u32All, fp->r700Shader.uStackSize,
+                 STACK_SIZE_shift, STACK_SIZE_mask);
+    }
+
+    SETfield(r700->SQ_PGM_EXPORTS_PS.u32All, fp->r700Shader.exportMode,
+             EXPORT_MODE_shift, EXPORT_MODE_mask);
+
+    if(fp->r700Shader.killIsUsed)
+    {
+           SETbit(r700->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit);
+    }
+    else
+    {
+        CLEARbit(r700->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit);
+    }
+
+    if(fp->r700Shader.depthIsExported)
+    {
+           SETbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit); 
+    }
+    else
+    {
+        CLEARbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit);
+    }
+
+    /* sent out shader constants. */
+
+    paramList = fp->mesa_program.Base.Parameters;
+
+    if(NULL != paramList)
+    {
+        _mesa_load_state_parameters(ctx, paramList);
+/* to be enabled */
+#if 0
+        unNumParamData = paramList->NumParameters * 4;
+        R700_CMDBUF_CHECK_SPACE(2 + unNumParamData);
+
+        R700EP3(context, IT_SET_ALU_CONST, unNumParamData);
+        /* assembler map const from very beginning. */
+        R700E32(context, SQ_ALU_CONSTANT_PS_OFFSET * 4);
+
+        unNumParamData = paramList->NumParameters;
+
+        for(ui=0; ui<unNumParamData; ui++)
+        {
+            R700E32(context, *((unsigned int*)&(paramList->ParameterValues[ui][0])));
+            R700E32(context, *((unsigned int*)&(paramList->ParameterValues[ui][1])));
+            R700E32(context, *((unsigned int*)&(paramList->ParameterValues[ui][2])));
+            R700E32(context, *((unsigned int*)&(paramList->ParameterValues[ui][3])));
+        }
+#endif
+    }
+
+    return GL_TRUE;
+}
+
+
+
index c19ad181e423c6d356b389b9cdca33d401e9885a..649ee4822d3105c75fc1894ab78c5892bb486310 100644 (file)
@@ -1,62 +1,63 @@
 /*
- * Copyright (C) 2005 Ben Skeggs.
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
  *
- * All Rights Reserved.
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
  *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
  *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
 /*
  * Authors:
- *   Ben Skeggs <darktama@iinet.net.au>
- *   Jerome Glisse <j.glisse@gmail.com>
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
  */
-#ifndef __R700_FRAGPROG_H_
-#define __R700_FRAGPROG_H_
 
-#include "main/glheader.h"
-#include "main/macros.h"
-#include "main/enums.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_print.h"
-#include "shader/program.h"
-#include "shader/prog_instruction.h"
+#ifndef _R700_FRAGPROG_H_
+#define _R700_FRAGPROG_H_
 
 #include "r600_context.h"
-#include "r600_state.h"
-#include "radeon_program.h"
+#include "r700_assembler.h"
+
+struct r700_fragment_program
+{
+       struct gl_fragment_program mesa_program;
 
-struct r500_fragment_program;
+    r700_AssemblerBase r700AsmCode;
+       R700_Shader        r700Shader;
 
-extern void r500TranslateFragmentShader(r600ContextPtr r600,
-                                       struct r500_fragment_program *fp);
+       GLboolean translated;
+    GLboolean loaded;
+       GLboolean error;
+/* to be enabled */
+#if 0
+    struct r600_dma_region shadercode;
+#endif
 
-struct r500_fragment_program_compiler {
-       r600ContextPtr r600;
-       struct r500_fragment_program *fp;
-       struct r500_fragment_program_code *code;
-       struct gl_program *program;
+       GLboolean WritesDepth;
+       GLuint optimization;
 };
 
-extern GLboolean r500FragmentProgramEmit(struct r500_fragment_program_compiler *compiler);
+/* Internal */
+void Map_Fragment_Program(r700_AssemblerBase         *pAsm,
+                                                 struct gl_fragment_program *mesa_fp);
+GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp,
+                                                                  struct gl_fragment_program   *mesa_fp);
 
-#endif
+/* Interface */
+extern GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp,
+                                                                        struct gl_fragment_program   *mesa_vp);
+extern GLboolean r700SetupFragmentProgram(GLcontext * ctx);
+
+#endif /*_R700_FRAGPROG_H_*/
diff --git a/src/mesa/drivers/dri/r600/r700_ioctl.c b/src/mesa/drivers/dri/r600/r700_ioctl.c
new file mode 100644 (file)
index 0000000..0541dc7
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#include <sched.h>
+#include <errno.h>
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/context.h"
+#include "swrast/swrast.h"
+
+#include "radeon_common.h"
+#include "radeon_lock.h"
+#include "r600_context.h"
+
+#include "r700_ioctl.h"
+#include "r700_clear.h"
+
+void r700InitIoctlFuncs(struct dd_function_table *functions)
+{
+       functions->Clear = r700Clear;
+       functions->Finish = radeonFinish;
+       functions->Flush = radeonFlush;
+}
diff --git a/src/mesa/drivers/dri/r600/r700_ioctl.h b/src/mesa/drivers/dri/r600/r700_ioctl.h
new file mode 100644 (file)
index 0000000..414dc3e
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#ifndef __R700_IOCTL_H__
+#define __R700_IOCTL_H__
+
+#include "r600_context.h"
+#include "radeon_drm.h"
+
+extern void r700InitIoctlFuncs(struct dd_function_table *functions);
+
+#endif                         /* __R700_IOCTL_H__ */
diff --git a/src/mesa/drivers/dri/r600/r700_oglprog.c b/src/mesa/drivers/dri/r600/r700_oglprog.c
new file mode 100644 (file)
index 0000000..ee63776
--- /dev/null
@@ -0,0 +1,138 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#include <string.h>
+
+#include "main/glheader.h"
+#include "main/imports.h"
+
+#include "shader/program.h"
+#include "tnl/tnl.h"
+
+#include "r600_context.h"
+
+#include "r700_chip.h"
+#include "r700_oglprog.h"
+
+#if 0 /* to be enabled */
+#include "r700_fragprog.h"
+#include "r700_vertprog.h"
+#endif /* to be enabled */
+
+static struct gl_program *r700NewProgram(GLcontext * ctx, 
+                                         GLenum target,
+                                                            GLuint id)
+{
+       struct gl_program *pProgram = NULL;
+#if 0 /* to be enabled */
+    struct r700_vertex_program *vp;
+       struct r700_fragment_program *fp;
+
+    switch (target) 
+    {
+    case GL_VERTEX_STATE_PROGRAM_NV:
+    case GL_VERTEX_PROGRAM_ARB:            
+        vp       = CALLOC_STRUCT(r700_vertex_program);
+           pProgram = _mesa_init_vertex_program(ctx, 
+                                             &vp->mesa_program,
+                                                                target, 
+                                             id);
+        vp->translated = GL_FALSE;
+        vp->loaded     = GL_FALSE;
+        vp->shadercode.buf = NULL;
+           break;
+    case GL_FRAGMENT_PROGRAM_NV:
+    case GL_FRAGMENT_PROGRAM_ARB:
+               fp       = CALLOC_STRUCT(r700_fragment_program);
+               pProgram = _mesa_init_fragment_program(ctx, 
+                                               &fp->mesa_program,
+                                                                      target, 
+                                               id);
+        fp->translated = GL_FALSE;
+        fp->loaded     = GL_FALSE;
+        fp->shadercode.buf = NULL;
+           break;
+    default:
+           _mesa_problem(ctx, "Bad target in r700NewProgram");
+    }
+#endif /* to be enabled */
+       return pProgram;
+}
+
+static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog)
+{
+#if 0 /* to be enabled */
+    struct r700_vertex_program   * vp;
+    struct r700_fragment_program * fp;
+    context_t *context = R700_CONTEXT(ctx);
+
+    switch (prog->Target) 
+    {
+    case GL_VERTEX_STATE_PROGRAM_NV:
+    case GL_VERTEX_PROGRAM_ARB:            
+        vp = (struct r700_vertex_program*)prog;
+        /* Release DMA region */
+        (context->chipobj.FreeDmaRegion)(context, &(vp->shadercode));
+        /* Clean up */
+        Clean_Up_Assembler(&(vp->r700AsmCode));
+        Clean_Up_Shader(&(vp->r700Shader));
+           break;
+    case GL_FRAGMENT_PROGRAM_NV:
+    case GL_FRAGMENT_PROGRAM_ARB:
+               fp = (struct r700_fragment_program*)prog;
+        /* Release DMA region */
+        (context->chipobj.FreeDmaRegion)(context, &(fp->shadercode));
+        /* Clean up */
+        Clean_Up_Assembler(&(fp->r700AsmCode));
+        Clean_Up_Shader(&(fp->r700Shader));
+           break;
+    default:
+           _mesa_problem(ctx, "Bad target in r700NewProgram");
+    }
+
+       _mesa_delete_program(ctx, prog);
+#endif /* to be enabled */
+}
+
+static void
+r700ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog)
+{
+
+}
+
+static GLboolean r700IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog)
+{
+
+       return GL_TRUE;
+}
+
+void r700InitShaderFuncs(struct dd_function_table *functions)
+{
+       functions->NewProgram = r700NewProgram;
+       functions->DeleteProgram = r700DeleteProgram;
+       functions->ProgramStringNotify = r700ProgramStringNotify;
+       functions->IsProgramNative = r700IsProgramNative;
+}
diff --git a/src/mesa/drivers/dri/r600/r700_oglprog.h b/src/mesa/drivers/dri/r600/r700_oglprog.h
new file mode 100644 (file)
index 0000000..fe2e9d1
--- /dev/null
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ *   CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
+ */
+
+#ifndef _R700_OGLPROG_H_
+#define _R700_OGLPROG_H_
+#include "r600_context.h"
+
+extern void r700InitShaderFuncs(struct dd_function_table *functions);
+
+#endif /*_R700_OGLPROG_H_*/
diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c
new file mode 100644 (file)
index 0000000..fce99ad
--- /dev/null
@@ -0,0 +1,462 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ *   CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
+ */
+
+#include "main/glheader.h"
+#include "main/state.h"
+#include "main/imports.h"
+#include "main/enums.h"
+#include "main/macros.h"
+#include "main/context.h"
+#include "main/dd.h"
+#include "main/simple_list.h"
+#include "main/api_arrayelt.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "vbo/vbo.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_vp_build.h"
+#include "tnl/t_context.h"
+#include "tnl/t_vertex.h"
+#include "tnl/t_pipeline.h"
+
+#include "r600_context.h"
+
+#include "r700_chip.h"
+
+/* to be enable
+#include "r700_vertprog.h"
+#include "r700_fragprog.h"
+#include "r700_state.h"
+#include "r700_tex.h"
+#include "r700_emit.h"
+*/
+
+#if 0 // to be enable
+void r700WaitForIdle(context_t *context)
+{
+    R700EP3 (context, IT_SET_CONFIG_REG, 1);
+    R700E32 (context, mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX);
+    R700E32 (context, 1 << 15);
+}
+
+void r700WaitForIdleClean(context_t *context)
+{
+    R700EP3 (context, IT_EVENT_WRITE, 0);
+    R700E32 (context, 0x16);
+
+    R700EP3 (context, IT_SET_CONFIG_REG, 1);
+    R700E32 (context, mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX);
+    R700E32 (context, 1 << 17); 
+}
+
+static void r700Start3D(context_t *context)
+{
+    if (context->screen->chip.type <= CHIP_TYPE_RV670)
+    {
+        R700EP3 (context, IT_START_3D_CMDBUF, 1);
+        R700E32 (context, 0);
+    }
+
+    R700EP3 (context, IT_CONTEXT_CONTROL, 1);
+    R700E32 (context, 0x80000000);
+    R700E32 (context, 0x80000000);
+    r700WaitForIdleClean(context);
+}
+
+
+static int r700SetupStreams(GLcontext * ctx)
+{
+    context_t         *context = R700_CONTEXT(ctx);
+
+    struct r700_vertex_program *vpc
+             = (struct r700_vertex_program *)ctx->VertexProgram._Current;
+
+    TNLcontext *tnl = TNL_CONTEXT(ctx);
+       struct vertex_buffer *vb = &tnl->vb;
+
+    unsigned int unBit;
+       unsigned int i;
+
+    R700_CMDBUF_CHECK_SPACE(6);
+    R700EP3 (context, IT_SET_CTL_CONST, 1);
+    R700E32 (context, mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX);
+    R700E32 (context, 0);
+
+    R700EP3 (context, IT_SET_CTL_CONST, 1);
+    R700E32 (context, mmSQ_VTX_START_INST_LOC - ASIC_CTL_CONST_BASE_INDEX);
+    R700E32 (context, 0);
+
+    context->aos_count = 0;
+       for(i=0; i<VERT_ATTRIB_MAX; i++)
+       {
+               unBit = 1 << i;
+               if(vpc->mesa_program.Base.InputsRead & unBit) 
+               {
+            (context->chipobj.EmitVec)(ctx, 
+                        &(context->aos[context->aos_count]),
+                                       vb->AttribPtr[i]->data,
+                                       vb->AttribPtr[i]->size,
+                                       vb->AttribPtr[i]->stride, 
+                        vb->Count);
+
+            context->aos[context->aos_count].aos_size = vb->AttribPtr[i]->size;
+
+            /* currently aos are packed */
+            r700SetupVTXConstans(ctx, 
+                                 i,
+                                 (unsigned int)context->aos[context->aos_count].aos_offset,
+                                 (unsigned int)vb->AttribPtr[i]->size,
+                                 (unsigned int)(vb->AttribPtr[i]->size * 4),
+                                 (unsigned int)vb->Count);
+            /* TODO : enable this after MemUse fixed *=
+            (context->chipobj.MemUse)(context, context->aos[context->aos_count].buf->id);
+            */
+
+            context->aos_count++;
+               }
+       }
+    for(i=context->aos_count; i<VERT_ATTRIB_MAX; i++)
+    {
+        context->aos[i].buf = NULL;
+    }
+
+    return R600_FALLBACK_NONE;
+}
+
+static GLboolean r700SetupShaders(GLcontext * ctx)
+{
+    context_t *context = R700_CONTEXT(ctx);
+
+    R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj);
+
+    GLuint exportCount;
+
+       r700->SQ_PGM_RESOURCES_PS.u32All = 0;
+       r700->SQ_PGM_RESOURCES_VS.u32All = 0;
+
+       SETbit(r700->SQ_PGM_RESOURCES_PS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit);
+    SETbit(r700->SQ_PGM_RESOURCES_VS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit);
+
+    r700SetupVertexProgram(ctx);
+
+    r700SetupFragmentProgram(ctx);
+
+       exportCount = (r700->SQ_PGM_EXPORTS_PS.u32All & EXPORT_MODE_mask) / (1 << EXPORT_MODE_shift);
+    r700->CB_SHADER_CONTROL.u32All = (1 << exportCount) - 1;
+
+    return GL_TRUE;
+}
+
+GLboolean r700SendTextureState(context_t *context)
+{
+    unsigned int i;
+
+    R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj);
+
+    for(i=0; i<R700_TEXTURE_NUMBERUNITS; i++)
+    {
+        if(r700->texture_states.textures[i] != 0)
+        {
+            R700_CMDBUF_CHECK_SPACE(9);
+            R700EP3 (context, IT_SET_RESOURCE, 7);
+            R700E32 (context, i * 7);
+            R700E32 (context, r700->texture_states.textures[i]->SQ_TEX_RESOURCE0.u32All);
+            R700E32 (context, r700->texture_states.textures[i]->SQ_TEX_RESOURCE1.u32All);
+            R700E32 (context, r700->texture_states.textures[i]->SQ_TEX_RESOURCE2.u32All);
+            R700E32 (context, r700->texture_states.textures[i]->SQ_TEX_RESOURCE3.u32All);
+            R700E32 (context, r700->texture_states.textures[i]->SQ_TEX_RESOURCE4.u32All);
+            R700E32 (context, r700->texture_states.textures[i]->SQ_TEX_RESOURCE5.u32All);
+            R700E32 (context, r700->texture_states.textures[i]->SQ_TEX_RESOURCE6.u32All);
+        }
+
+        if(r700->texture_states.samplers[i] != 0)
+        {
+            R700_CMDBUF_CHECK_SPACE(5);
+            R700EP3 (context, IT_SET_SAMPLER, 3);        
+            R700E32 (context, i * 3);   // Base at 0x7000
+            R700E32 (context, r700->texture_states.samplers[i]->SQ_TEX_SAMPLER0.u32All);
+            R700E32 (context, r700->texture_states.samplers[i]->SQ_TEX_SAMPLER1.u32All);
+            R700E32 (context, r700->texture_states.samplers[i]->SQ_TEX_SAMPLER2.u32All);
+        }
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean r700SyncSurf(context_t *context)
+{
+    /* TODO : too heavy? */
+    unsigned int CP_COHER_CNTL   = 0;
+
+    CP_COHER_CNTL |= TC_ACTION_ENA_bit
+                       |VC_ACTION_ENA_bit
+                       |CB_ACTION_ENA_bit
+                       |DB_ACTION_ENA_bit
+                       |SH_ACTION_ENA_bit
+                       |SMX_ACTION_ENA_bit;
+
+
+    R700_CMDBUF_CHECK_SPACE(5);
+    R700EP3(context, IT_SURFACE_SYNC, 3);
+    R700E32(context, CP_COHER_CNTL);
+    R700E32(context, 0xFFFFFFFF);
+    R700E32(context, 0x00000000);
+    R700E32(context, 10);
+
+    return GL_TRUE;
+}
+
+static void r700SetRenderTarget(context_t *context)
+{
+    R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj);
+
+    r700->CB_COLOR0_BASE.u32All = context->target.rt.gpu >> 8;
+}
+
+unsigned int r700PrimitiveType(int prim)
+{
+    switch (prim & PRIM_MODE_MASK) 
+    {
+    case GL_POINTS:
+        return DI_PT_POINTLIST;
+        break;
+    case GL_LINES:
+        return DI_PT_LINELIST;
+        break;
+    case GL_LINE_STRIP:
+        return DI_PT_LINESTRIP;
+        break;
+    case GL_LINE_LOOP:
+        return DI_PT_LINELOOP;
+        break;
+    case GL_TRIANGLES:
+        return DI_PT_TRILIST;
+        break;
+    case GL_TRIANGLE_STRIP:
+        return DI_PT_TRISTRIP;
+        break;
+    case GL_TRIANGLE_FAN:
+        return DI_PT_TRIFAN;
+        break;
+    case GL_QUADS:
+        return DI_PT_QUADLIST;
+        break;
+    case GL_QUAD_STRIP:
+        return DI_PT_QUADSTRIP;
+        break;
+    case GL_POLYGON:
+        return DI_PT_POLYGON;
+        break;
+    default:
+        assert(0);
+        return -1;
+        break;
+    }
+}
+
+#endif // to be enable
+
+static GLboolean r700RunRender(GLcontext * ctx,
+                                          struct tnl_pipeline_stage *stage)
+{
+#if 0 // to be enable
+    context_t *context = R700_CONTEXT(ctx);
+    R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj);
+    unsigned int i, j;
+    TNLcontext *tnl = TNL_CONTEXT(ctx);
+    struct vertex_buffer *vb = &tnl->vb;
+
+    struct r700_fragment_program *fp = (struct r700_fragment_program *)
+                                          (ctx->FragmentProgram._Current);
+    if (context->screen->chip.type <= CHIP_TYPE_RV670)
+    {
+        fp->r700AsmCode.bR6xx = 1;
+    }
+
+    r700Start3D(context); /* TODO : this is too much. */
+
+    r700SyncSurf(context); /* TODO : make it light. */
+
+    r700UpdateShaders(ctx);
+
+    r700SetRenderTarget(context);
+
+    if(r700SetupStreams(ctx))
+    {
+        return GL_TRUE;
+    }
+
+    r700UpdateTextureState(context);
+    r700SendTextureState(context);
+
+    if(GL_FALSE == fp->translated)
+    {
+        if( GL_FALSE == r700TranslateFragmentShader(fp, &(fp->mesa_program)) )
+        {
+            return GL_TRUE;
+        }
+    }
+
+    r700SetupShaders(ctx);
+
+    /* set a valid base address to make the command checker happy */
+    r700->SQ_PGM_START_FS.u32All     = r700->SQ_PGM_START_PS.u32All;
+    r700->SQ_PGM_START_ES.u32All     = r700->SQ_PGM_START_PS.u32All;
+    r700->SQ_PGM_START_GS.u32All     = r700->SQ_PGM_START_PS.u32All;
+
+    r700SendContextStates(context);
+
+    /* richard test code */
+    for (i = 0; i < vb->PrimitiveCount; i++) 
+    {
+        GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
+        GLuint start = vb->Primitive[i].start;
+        GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
+        GLuint numIndices = vb->Primitive[i].count;
+        GLuint numEntires;
+               //r300RunRenderPrimitive(rmesa, ctx, start, end, prim);
+
+        unsigned int VGT_DRAW_INITIATOR = 0;
+        unsigned int VGT_INDEX_TYPE     = 0;
+        unsigned int VGT_PRIMITIVE_TYPE = 0;
+        unsigned int VGT_NUM_INDICES    = 0;
+        
+        numEntires = 2 /* VGT_INDEX_TYPE */
+                     + 3 /* VGT_PRIMITIVE_TYPE */
+                     + numIndices + 3 /* DRAW_INDEX_IMMD */
+                     + 2; /* test stamp */
+                     
+        R700_CMDBUF_CHECK_SPACE(numEntires);  
+
+        VGT_INDEX_TYPE |= DI_INDEX_SIZE_32_BIT << INDEX_TYPE_shift;
+
+        R700EP3(context, IT_INDEX_TYPE, 0);
+        R700E32(context, VGT_INDEX_TYPE);
+
+        VGT_NUM_INDICES = numIndices;
+
+        VGT_PRIMITIVE_TYPE |= r700PrimitiveType(prim) << VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift;
+        R700EP3(context, IT_SET_CONFIG_REG, 1);
+        R700E32(context, mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX);
+        R700E32(context, VGT_PRIMITIVE_TYPE);
+
+        VGT_DRAW_INITIATOR |= DI_SRC_SEL_IMMEDIATE << SOURCE_SELECT_shift;
+        VGT_DRAW_INITIATOR |= DI_MAJOR_MODE_0 << MAJOR_MODE_shift;
+
+        R700EP3(context, IT_DRAW_INDEX_IMMD, (numIndices + 1));
+        R700E32(context, VGT_NUM_INDICES);
+        R700E32(context, VGT_DRAW_INITIATOR);
+
+        for (j=0; j<numIndices; j++)
+        {
+            R700E32(context, j);
+        }
+
+        /* test stamp, write a number to mmSCRATCH4 */
+        R700EP3(context, IT_SET_CONFIG_REG, 1);
+        R700E32(context, 0x2144 - 0x2000);
+        R700E32(context, 0x12341234);
+    }
+
+    /* Flush render op cached for last several quads. */
+    R700_CMDBUF_CHECK_SPACE(2);
+    R700EP3 (context, IT_EVENT_WRITE, 0);
+    R700E32 (context, CACHE_FLUSH_AND_INV_EVENT);
+
+    (context->chipobj.FlushCmdBuffer)(context);
+
+    /* free aos => TODO : cache mgr */
+    for (i = 0; i < context->aos_count; i++) 
+    {
+        (context->chipobj.FreeDmaRegion)(context, &(context->aos[i]));
+    }
+#endif // to be enable
+    return GL_FALSE;
+}
+
+static GLboolean r700RunNonTCLRender(GLcontext * ctx,
+                                    struct tnl_pipeline_stage *stage) /* -------------------- */
+{
+       GLboolean bRet = GL_TRUE;
+       
+       return bRet;
+}
+
+static GLboolean r700RunTCLRender(GLcontext * ctx,  /*----------------------*/
+                                 struct tnl_pipeline_stage *stage)
+{
+       GLboolean bRet = GL_FALSE;
+
+#if 0 // to be enable
+    context_t *context = R700_CONTEXT(ctx);
+
+    r700UpdateShaders(ctx);
+#endif // to be enable
+
+    bRet = r700RunRender(ctx, stage);
+
+    return bRet;
+       //GL_FALSE will stop to do other pipe stage in _tnl_run_pipeline
+    //The render here DOES finish the whole pipe, so GL_FALSE should be returned for success.
+}
+
+const struct tnl_pipeline_stage _r700_render_stage = {
+       "r700 Hardware Rasterization",
+       NULL,
+       NULL,
+       NULL,
+       NULL,
+       r700RunNonTCLRender
+};
+
+const struct tnl_pipeline_stage _r700_tcl_stage = {
+       "r700 Hardware Transform, Clipping and Lighting",
+       NULL,
+       NULL,
+       NULL,
+       NULL,
+       r700RunTCLRender
+};
+
+const struct tnl_pipeline_stage *r700_pipeline[] = 
+{
+    &_r700_tcl_stage,
+    &_tnl_vertex_transform_stage,
+       &_tnl_normal_transform_stage,
+       &_tnl_lighting_stage,
+       &_tnl_fog_coordinate_stage,
+       &_tnl_texgen_stage,
+       &_tnl_texture_transform_stage,
+       &_tnl_vertex_program_stage,
+
+    &_r700_render_stage,
+    &_tnl_render_stage,
+    0,
+};
+
+
diff --git a/src/mesa/drivers/dri/r600/r700_shader.c b/src/mesa/drivers/dri/r600/r700_shader.c
new file mode 100644 (file)
index 0000000..446f9f9
--- /dev/null
@@ -0,0 +1,528 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "main/imports.h"
+
+#include "main/glheader.h"
+
+#include "r600_context.h"
+#include "r700_chip.h"
+#include "r700_debug.h"
+
+#include "r700_shader.h"
+
+void r700ShaderInit(GLcontext * ctx)
+{
+}
+
+void AddInstToList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * pInst)
+{
+       if(NULL == plstCFInstructions->pTail)
+       {       //first
+               plstCFInstructions->pHead = pInst;
+               plstCFInstructions->pTail = pInst;
+       }
+       else
+       {
+               plstCFInstructions->pTail->pNextInst = pInst;
+               plstCFInstructions->pTail = pInst;
+       }
+       pInst->pNextInst = NULL;
+
+       plstCFInstructions->uNumOfNode++;
+}
+
+void Init_R700_Shader(R700_Shader * pShader)
+{
+       pShader->Type = R700_SHADER_INVALID;
+       pShader->pProgram = NULL;
+       pShader->bBinaryShader = GL_FALSE;
+       pShader->bFetchShaderRequired = GL_FALSE;
+       pShader->bNeedsAssembly = GL_FALSE;
+       pShader->bLinksDirty = GL_TRUE;
+       pShader->uShaderBinaryDWORDSize = 0;
+       pShader->nRegs = 0;
+       pShader->nParamExports = 0;
+       pShader->nMemExports = 0;
+       pShader->resource = 0;
+
+       pShader->exportMode = 0;
+       pShader->depthIsImported = GL_FALSE;
+
+       pShader->positionVectorIsExported = GL_FALSE;
+       pShader->miscVectorIsExported = GL_FALSE;
+       pShader->renderTargetArrayIndexIsExported = GL_FALSE;
+       pShader->ccDist0VectorIsExported = GL_FALSE;
+       pShader->ccDist1VectorIsExported = GL_FALSE; 
+
+
+       pShader->depthIsExported = GL_FALSE;
+       pShader->stencilRefIsExported = GL_FALSE;
+       pShader->coverageToMaskIsExported = GL_FALSE;
+       pShader->maskIsExported = GL_FALSE;
+       pShader->killIsUsed = GL_FALSE;
+
+       pShader->uCFOffset = 0;
+       pShader->uStackSize = 0;
+       pShader->uMaxCallDepth = 0;
+
+       pShader->bSurfAllocated = GL_FALSE;
+       
+       pShader->lstCFInstructions.pHead=NULL;  
+       pShader->lstCFInstructions.pTail=NULL;  
+       pShader->lstCFInstructions.uNumOfNode=0;
+       pShader->lstALUInstructions.pHead=NULL; 
+       pShader->lstALUInstructions.pTail=NULL; 
+       pShader->lstALUInstructions.uNumOfNode=0;
+       pShader->lstTEXInstructions.pHead=NULL; 
+       pShader->lstTEXInstructions.pTail=NULL; 
+       pShader->lstTEXInstructions.uNumOfNode=0;
+       pShader->lstVTXInstructions.pHead=NULL; 
+       pShader->lstVTXInstructions.pTail=NULL; 
+       pShader->lstVTXInstructions.uNumOfNode=0;
+}
+
+void AddCFInstruction(R700_Shader *pShader, R700ControlFlowInstruction *pCFInst)
+{
+    R700ControlFlowSXClause*  pSXClause; 
+    R700ControlFlowSMXClause* pSMXClause;
+
+    pCFInst->m_uIndex = pShader->lstCFInstructions.uNumOfNode;
+    AddInstToList(&(pShader->lstCFInstructions), 
+                  (R700ShaderInstruction*)pCFInst);
+    pShader->uShaderBinaryDWORDSize += GetInstructionSize(pCFInst->m_ShaderInstType);
+
+    pSXClause = NULL;
+    pSMXClause = NULL; 
+       switch (pCFInst->m_ShaderInstType)
+       {
+       case SIT_CF_ALL_EXP_SX:
+               pSXClause =  (R700ControlFlowSXClause*)pCFInst;
+               break;
+       case SIT_CF_ALL_EXP_SMX:
+               pSMXClause = (R700ControlFlowSMXClause*)pCFInst;
+               break;
+       default:
+               break;
+       };
+
+    if((pSXClause != NULL) && (pSXClause->m_Word0.f.type == SQ_EXPORT_PARAM))
+    {
+        pShader->nParamExports += pSXClause->m_Word1.f.burst_count + 1;
+    }
+    else if ((pSMXClause != NULL) && (pSMXClause->m_Word1.f.cf_inst == SQ_CF_INST_MEM_RING) &&
+            (pSMXClause->m_Word0.f.type == SQ_EXPORT_WRITE || pSMXClause->m_Word0.f.type == SQ_EXPORT_WRITE_IND))
+    {
+        pShader->nMemExports += pSMXClause->m_Word1.f.burst_count + 1;
+    }
+
+    pShader->bLinksDirty    = GL_TRUE;
+    pShader->bNeedsAssembly = GL_TRUE;
+
+    pCFInst->useCount++;
+}
+
+void AddVTXInstruction(R700_Shader *pShader, R700VertexInstruction *pVTXInst)
+{
+    pVTXInst->m_uIndex = pShader->lstVTXInstructions.uNumOfNode;
+       AddInstToList(&(pShader->lstVTXInstructions), 
+                  (R700ShaderInstruction*)pVTXInst);
+       pShader->uShaderBinaryDWORDSize += GetInstructionSize(pVTXInst->m_ShaderInstType);
+
+       if(pVTXInst->m_ShaderInstType == SIT_VTX_GENERIC)
+       {
+               R700VertexGenericFetch* pVTXGenericClause = (R700VertexGenericFetch*)pVTXInst;  
+               pShader->nRegs = (pShader->nRegs < pVTXGenericClause->m_Word1_GPR.f.dst_gpr) ? pVTXGenericClause->m_Word1_GPR.f.dst_gpr : pShader->nRegs;
+       }
+
+    pShader->bLinksDirty    = GL_TRUE;
+    pShader->bNeedsAssembly = GL_TRUE;
+
+    pVTXInst->useCount++;
+}
+
+void AddTEXInstruction(R700_Shader *pShader, R700TextureInstruction *pTEXInst)
+{
+    pTEXInst->m_uIndex = pShader->lstTEXInstructions.uNumOfNode;
+       AddInstToList(&(pShader->lstTEXInstructions), 
+                  (R700ShaderInstruction*)pTEXInst);
+       pShader->uShaderBinaryDWORDSize += GetInstructionSize(pTEXInst->m_ShaderInstType);
+
+    pShader->nRegs = (pShader->nRegs < pTEXInst->m_Word1.f.dst_gpr) ? pTEXInst->m_Word1.f.dst_gpr : pShader->nRegs;
+
+    pShader->bLinksDirty    = GL_TRUE;
+    pShader->bNeedsAssembly = GL_TRUE;
+
+    pTEXInst->useCount++;
+}
+
+void AddALUInstruction(R700_Shader *pShader, R700ALUInstruction *pALUInst)
+{
+    pALUInst->m_uIndex = pShader->lstALUInstructions.uNumOfNode;
+    AddInstToList(&(pShader->lstALUInstructions), 
+                  (R700ShaderInstruction*)pALUInst);
+    pShader->uShaderBinaryDWORDSize += GetInstructionSize(pALUInst->m_ShaderInstType);
+
+    pShader->nRegs = (pShader->nRegs < pALUInst->m_Word1.f.dst_gpr) ? pALUInst->m_Word1.f.dst_gpr : pShader->nRegs;
+
+    pShader->bLinksDirty    = GL_TRUE;
+    pShader->bNeedsAssembly = GL_TRUE;
+
+    pALUInst->useCount++;
+}
+
+void ResolveLinks(R700_Shader *pShader)
+{
+    GLuint uiSize;
+    R700ShaderInstruction  *pInst;
+    R700ALUInstruction     *pALUinst;
+    R700TextureInstruction *pTEXinst;
+    R700VertexInstruction  *pVTXinst; 
+
+    GLuint vtxOffset;
+
+       GLuint cfOffset = 0x0;  
+
+    GLuint aluOffset = cfOffset + pShader->lstCFInstructions.uNumOfNode * GetInstructionSize(SIT_CF);
+
+    GLuint texOffset = aluOffset;  // + m_lstALUInstructions.size() * R700ALUInstruction::SIZE,
+
+    pInst = pShader->lstALUInstructions.pHead;
+    while(NULL != pInst)
+    {
+        texOffset += GetInstructionSize(pInst->m_ShaderInstType);
+
+        pInst = pInst->pNextInst;
+    };
+  
+    vtxOffset = texOffset + pShader->lstTEXInstructions.uNumOfNode * GetInstructionSize(SIT_TEX);
+
+    if ( ((pShader->lstTEXInstructions.uNumOfNode > 0) && (texOffset % 4 != 0)) || 
+         ((pShader->lstVTXInstructions.uNumOfNode > 0) && (vtxOffset % 4 != 0))    )
+    {
+        pALUinst = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
+        Init_R700ALUInstruction(pALUinst);
+        AddALUInstruction(pShader, pALUinst);
+        texOffset += GetInstructionSize(SIT_ALU);
+        vtxOffset += GetInstructionSize(SIT_ALU);
+    }
+
+    pInst  = pShader->lstALUInstructions.pHead;
+    uiSize = 0;
+    while(NULL != pInst)
+    {
+        pALUinst = (R700ALUInstruction*)pInst;
+
+        if(pALUinst->m_pLinkedALUClause != NULL)
+        {
+            // This address is quad-word aligned
+            pALUinst->m_pLinkedALUClause->m_Word0.f.addr = (aluOffset + uiSize) >> 1;
+        }
+
+        uiSize += GetInstructionSize(pALUinst->m_ShaderInstType);
+
+        pInst = pInst->pNextInst;
+    };
+
+    pInst  = pShader->lstTEXInstructions.pHead;
+    uiSize = 0;
+    while(NULL != pInst)
+    {
+        pTEXinst = (R700TextureInstruction*)pInst;
+
+        if (pTEXinst->m_pLinkedGenericClause != NULL)
+        {
+            pTEXinst->m_pLinkedGenericClause->m_Word0.f.addr = (texOffset + uiSize) >> 1;
+        }
+
+        uiSize += GetInstructionSize(pTEXinst->m_ShaderInstType);
+
+        pInst = pInst->pNextInst;
+    };
+
+    pInst  = pShader->lstVTXInstructions.pHead;
+    uiSize = 0;
+    while(NULL != pInst)
+    {
+        pVTXinst = (R700VertexInstruction*)pInst;
+
+        if (pVTXinst->m_pLinkedGenericClause != NULL)
+        {
+            pVTXinst->m_pLinkedGenericClause->m_Word0.f.addr = (vtxOffset + uiSize) >> 1;
+        }
+
+        uiSize += GetInstructionSize(pVTXinst->m_ShaderInstType);
+
+        pInst = pInst->pNextInst;
+    };
+
+    pShader->bLinksDirty = GL_FALSE;
+}
+
+void Assemble(R700_Shader *pShader)
+{
+       GLuint i;
+    GLuint *pShaderBinary;
+    GLuint size_of_program;
+    GLuint *pCurrPos;
+
+    GLuint end_of_cf_instructions;
+    GLuint number_of_alu_dwords;
+
+    R700ShaderInstruction  *pInst;
+
+    if(GL_TRUE == pShader->bBinaryShader)
+    {
+        return;
+    }
+
+    if(pShader->bLinksDirty == GL_TRUE) 
+    {
+        ResolveLinks(pShader);
+    }
+
+    size_of_program = pShader->uShaderBinaryDWORDSize;
+    
+    pShaderBinary = (GLuint*) MALLOC(sizeof(GLuint)*size_of_program);
+    pCurrPos = pShaderBinary;
+
+    for (i = 0; i < size_of_program; i++)
+    {
+        pShaderBinary[i] = 0;
+    }
+
+    pInst = pShader->lstCFInstructions.pHead;
+    while(NULL != pInst)
+    {
+        switch (pInst->m_ShaderInstType)
+        {
+        case SIT_CF_GENERIC: 
+            {
+                R700ControlFlowGenericClause* pCFgeneric = (R700ControlFlowGenericClause*)pInst;
+                *pCurrPos++ = pCFgeneric->m_Word0.val;
+                *pCurrPos++ = pCFgeneric->m_Word1.val;
+            }
+            break;
+        case SIT_CF_ALU: 
+            {
+                R700ControlFlowALUClause* pCFalu = (R700ControlFlowALUClause*)pInst;
+                *pCurrPos++ = pCFalu->m_Word0.val;
+                *pCurrPos++ = pCFalu->m_Word1.val;
+            }
+            break;
+        case SIT_CF_ALL_EXP_SX: 
+            {
+                R700ControlFlowSXClause* pCFsx = (R700ControlFlowSXClause*)pInst;
+                *pCurrPos++ = pCFsx->m_Word0.val;
+                *pCurrPos++ = (pCFsx->m_Word1.val | pCFsx->m_Word1_SWIZ.val);
+            }
+            break;
+        case SIT_CF_ALL_EXP_SMX: 
+            {
+                R700ControlFlowSMXClause* pCFsmx = (R700ControlFlowSMXClause*)pInst;
+                *pCurrPos++ = pCFsmx->m_Word0.val;
+                *pCurrPos++ = (pCFsmx->m_Word1.val | pCFsmx->m_Word1_BUF.val);
+            }
+            break;
+        default:
+            break;
+        }
+
+        pInst = pInst->pNextInst;
+    };
+    
+    number_of_alu_dwords = 0;
+    pInst = pShader->lstALUInstructions.pHead;
+    while(NULL != pInst)
+    {
+        switch (pInst->m_ShaderInstType)
+        {
+        case SIT_ALU: 
+            {
+                R700ALUInstruction* pALU = (R700ALUInstruction*)pInst;
+
+                *pCurrPos++ = pALU->m_Word0.val;
+                *pCurrPos++ = (pALU->m_Word1.val | pALU->m_Word1_OP2.val | pALU->m_Word1_OP3.val);
+
+                number_of_alu_dwords += 2;
+            }
+            break;
+        case SIT_ALU_HALF_LIT: 
+            {
+                R700ALUInstructionHalfLiteral* pALUhalf = (R700ALUInstructionHalfLiteral*)pInst;
+
+                *pCurrPos++ = pALUhalf->m_Word0.val;
+                *pCurrPos++ = (pALUhalf->m_Word1.val | pALUhalf->m_Word1_OP2.val | pALUhalf->m_Word1_OP3.val);
+                *pCurrPos++ = *((GLuint*)&(pALUhalf->m_fLiteralX));
+                *pCurrPos++ = *((GLuint*)&(pALUhalf->m_fLiteralY));
+
+                number_of_alu_dwords += 4;
+            }
+            break;
+        case SIT_ALU_FALL_LIT: 
+            {
+                R700ALUInstructionFullLiteral* pALUfull = (R700ALUInstructionFullLiteral*)pInst;
+
+                *pCurrPos++ = pALUfull->m_Word0.val;
+                *pCurrPos++ = (pALUfull->m_Word1.val | pALUfull->m_Word1_OP2.val | pALUfull->m_Word1_OP3.val);
+
+                *pCurrPos++ = *((GLuint*)&(pALUfull->m_fLiteralX));
+                *pCurrPos++ = *((GLuint*)&(pALUfull->m_fLiteralY));
+                *pCurrPos++ = *((GLuint*)&(pALUfull->m_fLiteralZ));
+                *pCurrPos++ = *((GLuint*)&(pALUfull->m_fLiteralW));
+
+                number_of_alu_dwords += 6;
+            }
+            break;
+        default:
+            break;
+        }
+
+        pInst = pInst->pNextInst;
+    };
+    
+    pInst = pShader->lstTEXInstructions.pHead;
+    while(NULL != pInst)
+    {
+        R700TextureInstruction* pTEX = (R700TextureInstruction*)pInst;
+
+        *pCurrPos++ = pTEX->m_Word0.val;
+        *pCurrPos++ = pTEX->m_Word1.val;
+        *pCurrPos++ = pTEX->m_Word2.val;
+        *pCurrPos++ = 0x0beadeaf;
+
+        pInst = pInst->pNextInst;
+    };
+    
+    pInst = pShader->lstVTXInstructions.pHead;
+    while(NULL != pInst)
+    {
+        switch (pInst->m_ShaderInstType)
+        {
+        case SIT_VTX_SEM: //
+            {
+                R700VertexSemanticFetch* pVTXsem = (R700VertexSemanticFetch*)pInst;
+
+                *pCurrPos++ = pVTXsem->m_Word0.val;
+                *pCurrPos++ = (pVTXsem->m_Word1.val | pVTXsem->m_Word1_SEM.val);
+                *pCurrPos++ = pVTXsem->m_Word2.val;
+                *pCurrPos++ = 0x0beadeaf;
+            }
+            break;
+        case SIT_VTX_GENERIC: //
+            {
+                R700VertexGenericFetch* pVTXgeneric = (R700VertexGenericFetch*)pInst;
+
+                *pCurrPos++ = pVTXgeneric->m_Word0.val;
+                *pCurrPos++ = (pVTXgeneric->m_Word1.val | pVTXgeneric->m_Word1_GPR.val);
+                *pCurrPos++ = pVTXgeneric->m_Word2.val;
+                *pCurrPos++ = 0x0beadeaf;
+            }
+            break;
+        default:
+            break;
+        }
+
+        pInst = pInst->pNextInst;
+    };
+
+    if(NULL != pShader->pProgram)
+    {
+        FREE(pShader->pProgram);
+    }
+    pShader->pProgram = (GLubyte*)pShaderBinary;
+
+    end_of_cf_instructions = pShader->uCFOffset + pShader->lstCFInstructions.uNumOfNode * GetInstructionSize(SIT_CF);
+    
+    pShader->uEndOfCF = end_of_cf_instructions >> 1;
+
+    pShader->uEndOfALU = (end_of_cf_instructions + number_of_alu_dwords) >> 1;
+
+    pShader->uEndOfFetch = (pShader->uCFOffset + pShader->uShaderBinaryDWORDSize) >> 1;
+
+    pShader->bNeedsAssembly = GL_FALSE;
+}
+
+void LoadProgram(R700_Shader *pShader) //context
+{
+}
+
+void UpdateShaderRegisters(R700_Shader *pShader) //context
+{
+}
+
+void DeleteInstructions(R700_Shader *pShader)
+{
+}
+
+void DebugPrint(void)
+{
+}
+
+void Clean_Up_Shader(R700_Shader *pShader)
+{
+    FREE(pShader->pProgram);
+
+    R700ShaderInstruction  *pInst;
+    R700ShaderInstruction  *pInstToFree;
+
+    pInst = pShader->lstCFInstructions.pHead;
+    while(NULL != pInst)
+    {
+        pInstToFree = pInst;
+        pInst = pInst->pNextInst;
+        FREE(pInstToFree);
+    };
+    pInst = pShader->lstALUInstructions.pHead;
+    while(NULL != pInst)
+    {
+        pInstToFree = pInst;
+        pInst = pInst->pNextInst;
+        FREE(pInstToFree);
+    };
+    pInst = pShader->lstTEXInstructions.pHead;
+    while(NULL != pInst)
+    {
+        pInstToFree = pInst;
+        pInst = pInst->pNextInst;
+        FREE(pInstToFree);
+    };
+    pInst = pShader->lstVTXInstructions.pHead;
+    while(NULL != pInst)
+    {
+        pInstToFree = pInst;
+        pInst = pInst->pNextInst;
+        FREE(pInstToFree);
+    };
+}
+
diff --git a/src/mesa/drivers/dri/r600/r700_shader.h b/src/mesa/drivers/dri/r600/r700_shader.h
new file mode 100644 (file)
index 0000000..bfd01e1
--- /dev/null
@@ -0,0 +1,150 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+
+#ifndef __R700_SHADER_H__
+#define __R700_SHADER_H__
+
+#include "main/mtypes.h"
+
+#include "r700_shaderinst.h"
+
+
+void r700ShaderInit(GLcontext * ctx);
+
+typedef enum R700ShaderType
+{
+    R700_SHADER_FS      = 0x0,
+    R700_SHADER_ES      = 0x1,
+    R700_SHADER_GS      = 0x2,
+    R700_SHADER_VS      = 0x3,
+    R700_SHADER_PS      = 0x4,
+    R700_SHADER_INVALID = 0x5,
+} R700ShaderType;
+
+typedef struct TypedShaderList 
+{
+       R700ShaderInstruction * pHead;
+       R700ShaderInstruction * pTail;
+       GLuint  uNumOfNode;
+} TypedShaderList;
+
+typedef struct RealRegister 
+{
+    GLuint uAddr;
+    GLuint uValue;
+} RealRegister;
+
+typedef struct InstDeps
+{
+    GLint nDstDep;
+    GLint nSrcDeps[3];
+} InstDeps;
+
+typedef struct R700_Shader 
+{
+       R700ShaderType   Type;
+
+    GLubyte*  pProgram;
+
+    GLboolean bBinaryShader;
+    GLboolean bFetchShaderRequired;
+    GLboolean bNeedsAssembly;
+    GLboolean bLinksDirty;
+
+    GLuint  uShaderBinaryDWORDSize; // in DWORDS
+    GLuint  nRegs;      
+    GLuint  nParamExports;   // VS_ EXPORT_COUNT (1 based, the actual register is 0 based!)
+    GLuint  nMemExports; 
+    GLuint  resource;     // VS and PS _RESOURCE
+    GLuint  exportMode;   // VS and PS _EXPORT_MODE
+
+    GLboolean  depthIsImported;             
+
+    // Vertex program exports
+    GLboolean  positionVectorIsExported;          
+
+    GLboolean  miscVectorIsExported;               
+    GLboolean  renderTargetArrayIndexIsExported;  
+
+    GLboolean  ccDist0VectorIsExported;  
+    GLboolean  ccDist1VectorIsExported;  
+
+    // Pixel program exports
+    GLboolean  depthIsExported;             
+    GLboolean  stencilRefIsExported;        
+    GLboolean  coverageToMaskIsExported;    
+    GLboolean  maskIsExported;              
+
+    GLboolean  killIsUsed;                  
+
+    GLuint  uStartAddr;
+    GLuint  uCFOffset;
+    GLuint  uEndOfCF;
+    GLuint  uEndOfALU;
+    GLuint  uEndOfFetch;
+    GLuint  uStackSize;
+    GLuint  uMaxCallDepth;
+
+       TypedShaderList lstCFInstructions;
+       TypedShaderList lstALUInstructions;
+       TypedShaderList lstTEXInstructions;
+       TypedShaderList lstVTXInstructions;
+
+    RealRegister RegStartAddr;
+    RealRegister RegCFOffset;
+    RealRegister RegEndCF;
+    RealRegister RegEndALU;
+    RealRegister egEndFetcg;
+
+       // -------- constants
+       GLfloat   ConstantArray[SQ_ALU_CONSTANT_PS_COUNT * 4];
+       
+       GLboolean bSurfAllocated;
+} R700_Shader;
+
+//Internal
+void AddInstToList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * pInst);
+void ResolveLinks(R700_Shader *pShader);
+void Assemble(R700_Shader *pShader);
+
+
+//Interface
+void Init_R700_Shader(R700_Shader * pShader);
+void AddCFInstruction(R700_Shader *pShader, R700ControlFlowInstruction *pCFInst);
+void AddVTXInstruction(R700_Shader *pShader, R700VertexInstruction *pVTXInst);
+void AddTEXInstruction(R700_Shader *pShader, R700TextureInstruction *pTEXInst);
+void AddALUInstruction(R700_Shader *pShader, R700ALUInstruction *pALUInst);
+
+void LoadProgram(R700_Shader *pShader);
+void UpdateShaderRegisters(R700_Shader *pShader);
+void DeleteInstructions(R700_Shader *pShader);
+void DebugPrint(void);
+
+void Clean_Up_Shader(R700_Shader *pShader);
+
+#endif /*__R700_SHADER_H__*/
+
diff --git a/src/mesa/drivers/dri/r600/r700_shaderinst.c b/src/mesa/drivers/dri/r600/r700_shaderinst.c
new file mode 100644 (file)
index 0000000..c1bffee
--- /dev/null
@@ -0,0 +1,224 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+
+#include "main/mtypes.h"
+
+#include "r700_debug.h"
+#include "r700_shaderinst.h"
+
+void Init_R700ControlFlowGenericClause(R700ControlFlowGenericClause* pInst)
+{
+    pInst->m_Word0.val = 0x00000000;
+    pInst->m_Word1.val = 0x00000000;
+
+    pInst->m_pLinkedVTXInstruction = 0;
+    pInst->m_pLinkedTEXInstruction = 0;
+
+    pInst->useCount = 0;
+
+       pInst->m_ShaderInstType = SIT_CF_GENERIC;
+}
+
+void Init_R700ControlFlowALUClause(R700ControlFlowALUClause* pInst)
+{
+    pInst->m_Word0.val = 0x00000000;
+    pInst->m_Word1.val = 0x00000000;
+
+    pInst->m_pLinkedALUInstruction = 0;
+
+    pInst->useCount = 0;
+
+       pInst->m_ShaderInstType = SIT_CF_ALU;
+}
+
+void Init_R700ControlFlowSXClause(R700ControlFlowSXClause* pInst)
+{
+    pInst->m_Word0.val      = 0x00000000;
+    pInst->m_Word1.val      = 0x00000000;
+    pInst->m_Word1_SWIZ.val = 0x00000000;
+
+    pInst->useCount = 0;
+
+       pInst->m_ShaderInstType = SIT_CF_ALL_EXP_SX;
+}
+
+void Init_R700ControlFlowSMXClause(R700ControlFlowSMXClause* pInst)
+{
+    pInst->m_Word0.val     = 0x00000000;
+    pInst->m_Word1.val     = 0x00000000;
+    pInst->m_Word1_BUF.val = 0x00000000;
+
+    pInst->useCount = 0;
+
+       pInst->m_ShaderInstType = SIT_CF_ALL_EXP_SMX;
+}
+
+void Init_R700ALUInstruction(R700ALUInstruction* pInst)
+{
+    pInst->m_Word0.val     = 0x00000000;
+    pInst->m_Word1.val     = 0x00000000;
+    pInst->m_Word1_OP2.val = 0x00000000;
+    pInst->m_Word1_OP3.val = 0x00000000;
+
+    pInst->m_pLinkedALUClause = 0;
+
+    pInst->useCount = 0;
+
+       pInst->m_ShaderInstType = SIT_ALU;
+}
+
+void Init_R700ALUInstructionHalfLiteral(R700ALUInstructionHalfLiteral* pInst, GLfloat x, GLfloat y)
+{
+       pInst->m_Word0.val     = 0x00000000;
+    pInst->m_Word1.val     = 0x00000000;
+    pInst->m_Word1_OP2.val = 0x00000000;
+    pInst->m_Word1_OP3.val = 0x00000000;
+
+       pInst->m_pLinkedALUClause = 0;
+
+    pInst->m_fLiteralX = x;
+    pInst->m_fLiteralY = y;
+
+    pInst->useCount = 0;
+
+       pInst->m_ShaderInstType = SIT_ALU_HALF_LIT;
+}
+
+void Init_R700ALUInstructionFullLiteral(R700ALUInstructionFullLiteral* pInst, GLfloat x, GLfloat y, GLfloat z, GLfloat w)
+{
+       pInst->m_Word0.val     = 0x00000000;
+    pInst->m_Word1.val     = 0x00000000;
+    pInst->m_Word1_OP2.val = 0x00000000;
+    pInst->m_Word1_OP3.val = 0x00000000;
+
+       pInst->m_pLinkedALUClause = 0;
+
+    pInst->m_fLiteralX = x;
+    pInst->m_fLiteralY = y;
+    pInst->m_fLiteralZ = z;
+    pInst->m_fLiteralW = w;
+
+    pInst->useCount = 0;
+
+       pInst->m_ShaderInstType = SIT_ALU_FALL_LIT;
+}
+
+void Init_R700TextureInstruction(R700TextureInstruction* pInst)
+{
+    pInst->m_Word0.val     = 0x00000000;
+    pInst->m_Word1.val     = 0x00000000;
+    pInst->m_Word2.val     = 0x00000000;
+
+    pInst->m_pLinkedGenericClause = 0;
+
+    pInst->useCount = 0;
+
+       pInst->m_ShaderInstType = SIT_TEX;
+}
+
+void Init_R700VertexSemanticFetch(R700VertexSemanticFetch* pInst)
+{
+    pInst->m_Word0.val     = 0x00000000;
+    pInst->m_Word1.val     = 0x00000000;
+    pInst->m_Word1_SEM.val = 0x00000000;
+    pInst->m_Word2.val     = 0x00000000;
+
+    pInst->m_pLinkedGenericClause = 0;
+
+    pInst->useCount = 0;
+
+       pInst->m_ShaderInstType = SIT_VTX_SEM;
+}
+
+void Init_R700VertexGenericFetch(R700VertexGenericFetch* pInst)
+{
+    pInst->m_Word0.val     = 0x00000000;
+    pInst->m_Word1.val     = 0x00000000;
+    pInst->m_Word1_GPR.val = 0x00000000;
+    pInst->m_Word2.val     = 0x00000000;
+
+    pInst->m_pLinkedGenericClause = 0;
+
+    pInst->useCount = 0;
+
+       pInst->m_ShaderInstType = SIT_VTX_GENERIC;
+}
+
+unsigned int GetInstructionSize(ShaderInstType instType)
+{
+    switch(instType)
+    {
+    case SIT_ALU_HALF_LIT:  
+    case SIT_TEX:           
+    case SIT_VTX:           
+    case SIT_VTX_GENERIC:   
+    case SIT_VTX_SEM:       
+        return 4;
+    case SIT_ALU_FALL_LIT:
+        return 6;
+    default:
+        break;
+    }
+
+    return 2;
+}
+
+unsigned int GetCFMaxInstructions(ShaderInstType instType)
+{
+    switch (instType)
+    {
+    case SIT_CF_ALL_EXP:    
+    case SIT_CF_ALL_EXP_SX: 
+    case SIT_CF_ALL_EXP_SMX:  
+        return 0x10;
+    case SIT_CF_GENERIC:
+        return 0x8;  //For tex and vtx
+    case SIT_CF_ALU:
+        return 0x80;
+    default:
+        break;
+    }
+    return 0x10;
+}
+
+GLboolean LinkVertexInstruction(R700ControlFlowGenericClause *pCFGeneric,
+                                                               R700VertexInstruction *pVTXInstruction)
+{
+       if (pCFGeneric->m_pLinkedTEXInstruction != 0) 
+       {
+               r700_error(ERROR_ASM_VTX_CLAUSE, "This instruction is already linked to a texture instruction");
+               return GL_FALSE;
+    }
+
+    pCFGeneric->m_pLinkedVTXInstruction     = pVTXInstruction;
+       pVTXInstruction->m_pLinkedGenericClause = pCFGeneric;
+
+       return GL_TRUE;
+}
+
+
+
diff --git a/src/mesa/drivers/dri/r600/r700_shaderinst.h b/src/mesa/drivers/dri/r600/r700_shaderinst.h
new file mode 100644 (file)
index 0000000..2829cca
--- /dev/null
@@ -0,0 +1,321 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+
+#ifndef _R700_SHADERINST_H_
+#define _R700_SHADERINST_H_
+
+#include "main/glheader.h"
+
+#include "defaultendian.h" 
+#include "sq_micro_reg.h"
+
+#define SQ_ALU_CONSTANT_PS_OFFSET      0x00000000
+#define SQ_ALU_CONSTANT_PS_COUNT       0x00000100
+#define SQ_ALU_CONSTANT_VS_OFFSET      0x00000100
+#define SQ_ALU_CONSTANT_VS_COUNT       0x00000100
+#define SQ_FETCH_RESOURCE_PS_OFFSET    0x00000000
+#define SQ_FETCH_RESOURCE_PS_COUNT     0x000000a0
+#define SQ_FETCH_RESOURCE_VS_OFFSET    0x000000a0
+#define SQ_FETCH_RESOURCE_VS_COUNT     0x000000b0
+
+#define SHADERINST_TYPEMASK_CF  0x10
+#define SHADERINST_TYPEMASK_ALU 0x20
+#define SHADERINST_TYPEMASK_TEX 0x40
+#define SHADERINST_TYPEMASK_VTX 0x80
+
+typedef enum ShaderInstType 
+{
+    SIT_CF = 0x10,            /*SIZE = 0x2*/
+        SIT_CF_ALL_EXP = 0x14,    /*SIZE = 0x2, MAX_INSTRUCTIONS = 0x10;*/
+            SIT_CF_ALL_EXP_SX = 0x15, /*SIZE = 0x2, MAX_INSTRUCTIONS = 0x10;*/
+            SIT_CF_ALL_EXP_SMX= 0x16, /*SIZE = 0x2, MAX_INSTRUCTIONS = 0x10;*/
+        SIT_CF_GENERIC = 0x18,    /*SIZE = 0x2, MAX_INSTRUCTIONS = 0x8;  //For tex and vtx*/
+        SIT_CF_ALU = 0x19,        /*SIZE = 0x2, MAX_INSTRUCTIONS = 0x80;*/
+    SIT_ALU = 0x20,           /*SIZE = 0x2,*/
+        SIT_ALU_HALF_LIT = 0x21,  /*SIZE = 0x4,*/
+        SIT_ALU_FALL_LIT = 0x22,  /*SIZE = 0x6,*/
+    SIT_TEX = 0x40,           /*SIZE = 0x4,*/
+    SIT_VTX = 0x80,           /*SIZE = 0x4, MEGA_FETCH_BYTES = 0x20*/
+        SIT_VTX_GENERIC = 0x81,   /*SIZE = 0x4, MEGA_FETCH_BYTES = 0x20*/
+        SIT_VTX_SEM = 0x82       /*SIZE = 0x4, MEGA_FETCH_BYTES = 0x20*/
+} ShaderInstType;
+
+typedef struct R700ShaderInstruction 
+{
+    ShaderInstType m_ShaderInstType;
+    struct R700ShaderInstruction *pNextInst;
+    GLuint m_uIndex;
+    GLuint useCount;
+} R700ShaderInstruction;
+
+// ------------------ CF insts ---------------------------
+
+typedef R700ShaderInstruction R700ControlFlowInstruction;
+
+typedef struct R700ControlFlowAllocExportClause  
+{
+    ShaderInstType          m_ShaderInstType;
+    R700ShaderInstruction * pNextInst;    
+    GLuint m_uIndex;
+    GLuint useCount;
+               
+    sq_cf_alloc_export_word0_u      m_Word0;
+    sq_cf_alloc_export_word1_u      m_Word1;
+} R700ControlFlowAllocExportClause;
+
+typedef struct R700ControlFlowSXClause 
+{
+       ShaderInstType          m_ShaderInstType;
+       R700ShaderInstruction * pNextInst;
+       //R700ControlFlowAllocExportClause
+               //R700ControlFlowInstruction 
+                       //R700ShaderInstruction
+       GLuint m_uIndex;
+    GLuint useCount;
+                       //---------------------
+               //---------------------------
+    sq_cf_alloc_export_word0_u      m_Word0;
+    sq_cf_alloc_export_word1_u      m_Word1;
+       //-------------------------------------
+
+    sq_cf_alloc_export_word1_swiz_u m_Word1_SWIZ;
+} R700ControlFlowSXClause;
+
+typedef struct R700ControlFlowSMXClause 
+{
+       ShaderInstType          m_ShaderInstType;
+       R700ShaderInstruction * pNextInst;
+    //R700ControlFlowAllocExportClause
+               //R700ControlFlowInstruction 
+                       //R700ShaderInstruction
+       GLuint m_uIndex;
+    GLuint useCount;
+                       //---------------------
+               //---------------------------
+    sq_cf_alloc_export_word0_u      m_Word0;
+    sq_cf_alloc_export_word1_u      m_Word1;
+       //-------------------------------
+
+    sq_cf_alloc_export_word1_buf_u m_Word1_BUF;
+} R700ControlFlowSMXClause;
+
+typedef struct R700ControlFlowGenericClause 
+{
+       ShaderInstType          m_ShaderInstType;
+       R700ShaderInstruction * pNextInst;
+       //R700ControlFlowInstruction
+               //R700ShaderInstruction
+       GLuint m_uIndex;
+    GLuint useCount;
+               //---------------------
+       //---------------------
+
+    sq_cf_word0_u m_Word0;
+    sq_cf_word1_u m_Word1;
+
+    struct R700VertexInstruction  *m_pLinkedVTXInstruction;
+    struct R700TextureInstruction *m_pLinkedTEXInstruction;
+} R700ControlFlowGenericClause;
+
+typedef struct R700ControlFlowALUClause 
+{
+       ShaderInstType          m_ShaderInstType;
+       R700ShaderInstruction * pNextInst;
+    //R700ControlFlowInstruction
+               //R700ShaderInstruction
+       GLuint m_uIndex;
+    GLuint useCount;
+               //---------------------
+       //---------------------
+
+    sq_cf_alu_word0_u m_Word0;
+    sq_cf_alu_word1_u m_Word1;
+    
+    struct R700ALUInstruction *m_pLinkedALUInstruction;
+} R700ControlFlowALUClause;
+
+// ------------------- End of CF Inst ------------------------
+
+// ------------------- ALU Inst ------------------------------
+typedef struct R700ALUInstruction 
+{
+       ShaderInstType          m_ShaderInstType;
+       R700ShaderInstruction * pNextInst;
+       //R700ShaderInstruction
+       GLuint m_uIndex;
+    GLuint useCount;
+       //---------------------
+
+    sq_alu_word0_u     m_Word0;
+    sq_alu_word1_u     m_Word1;
+    sq_alu_word1_op2_v2_u m_Word1_OP2;
+    sq_alu_word1_op3_u m_Word1_OP3;
+
+    struct R700ControlFlowALUClause *m_pLinkedALUClause;
+} R700ALUInstruction;
+
+typedef struct R700ALUInstructionHalfLiteral
+{
+       ShaderInstType          m_ShaderInstType;
+       R700ShaderInstruction * pNextInst;
+       //R700ALUInstruction 
+               //R700ShaderInstruction
+       GLuint m_uIndex;
+    GLuint useCount;
+               //---------------------
+
+    sq_alu_word0_u     m_Word0;
+    sq_alu_word1_u     m_Word1;
+    sq_alu_word1_op2_v2_u m_Word1_OP2;
+    sq_alu_word1_op3_u m_Word1_OP3;
+
+    struct R700ControlFlowALUClause *m_pLinkedALUClause;
+       //-------------------
+
+    GLfloat m_fLiteralX,
+            m_fLiteralY;
+} R700ALUInstructionHalfLiteral;
+
+typedef struct R700ALUInstructionFullLiteral 
+{
+       ShaderInstType          m_ShaderInstType;
+       R700ShaderInstruction * pNextInst;
+       //R700ALUInstruction 
+               //R700ShaderInstruction
+       GLuint m_uIndex;
+    GLuint useCount;
+               //---------------------
+
+    sq_alu_word0_u     m_Word0;
+    sq_alu_word1_u     m_Word1;
+    sq_alu_word1_op2_v2_u m_Word1_OP2;
+    sq_alu_word1_op3_u m_Word1_OP3;
+
+    struct R700ControlFlowALUClause *m_pLinkedALUClause;
+       //-------------------
+
+    GLfloat m_fLiteralX,
+            m_fLiteralY,
+            m_fLiteralZ,
+            m_fLiteralW;
+} R700ALUInstructionFullLiteral;
+// ------------------- End of ALU Inst -----------------------
+
+// ------------------- Textuer/Vertex Instruction --------------------
+
+typedef struct R700TextureInstruction 
+{
+       ShaderInstType          m_ShaderInstType;
+       R700ShaderInstruction * pNextInst;
+       //R700ShaderInstruction
+       GLuint m_uIndex;
+    GLuint useCount;
+       //---------------------
+       
+    sq_tex_word0_u m_Word0;
+    sq_tex_word1_u m_Word1;
+    sq_tex_word2_u m_Word2;
+
+    struct R700ControlFlowGenericClause *m_pLinkedGenericClause;
+} R700TextureInstruction;
+
+typedef struct R700VertexInstruction 
+{
+       ShaderInstType          m_ShaderInstType;
+       R700ShaderInstruction * pNextInst;
+       //R700ShaderInstruction
+       GLuint m_uIndex;
+    GLuint useCount;
+       //---------------------
+       
+    sq_vtx_word0_u     m_Word0;
+    sq_vtx_word1_u     m_Word1;
+    sq_vtx_word2_u     m_Word2;
+
+    struct R700ControlFlowGenericClause *m_pLinkedGenericClause;
+} R700VertexInstruction;
+//
+typedef struct R700VertexSemanticFetch 
+{
+       ShaderInstType          m_ShaderInstType;
+       R700ShaderInstruction * pNextInst;
+       //R700VertexInstruction
+               //R700ShaderInstruction
+       GLuint m_uIndex;
+    GLuint useCount;
+               //---------------------
+       
+    sq_vtx_word0_u     m_Word0;
+    sq_vtx_word1_u     m_Word1;
+    sq_vtx_word2_u     m_Word2;
+
+    struct R700ControlFlowGenericClause *m_pLinkedGenericClause;
+       //---------------------------
+
+    sq_vtx_word1_sem_u m_Word1_SEM;
+} R700VertexSemanticFetch;
+//
+typedef struct R700VertexGenericFetch 
+{
+       ShaderInstType          m_ShaderInstType;
+       R700ShaderInstruction * pNextInst;
+       //R700VertexInstruction
+               //R700ShaderInstruction
+       GLuint m_uIndex;
+    GLuint useCount;
+               //---------------------
+       
+    sq_vtx_word0_u     m_Word0;
+    sq_vtx_word1_u     m_Word1;
+    sq_vtx_word2_u     m_Word2;
+
+    struct R700ControlFlowGenericClause *m_pLinkedGenericClause;
+       //---------------------------
+
+    sq_vtx_word1_gpr_u m_Word1_GPR;
+} R700VertexGenericFetch;
+
+// ------------------- End of Texture Vertex Instruction --------------------
+
+void Init_R700ControlFlowGenericClause(R700ControlFlowGenericClause* pInst);
+void Init_R700ControlFlowALUClause(R700ControlFlowALUClause* pInst);
+void Init_R700ControlFlowSXClause(R700ControlFlowSXClause* pInst);
+void Init_R700ControlFlowSMXClause(R700ControlFlowSMXClause* pInst);
+void Init_R700ALUInstruction(R700ALUInstruction* pInst);
+void Init_R700ALUInstructionHalfLiteral(R700ALUInstructionHalfLiteral* pInst, GLfloat x, GLfloat y);
+void Init_R700ALUInstructionFullLiteral(R700ALUInstructionFullLiteral* pInst, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+void Init_R700TextureInstruction(R700TextureInstruction* pInst);
+void Init_R700VertexSemanticFetch(R700VertexSemanticFetch* pInst);
+void Init_R700VertexGenericFetch(R700VertexGenericFetch* pInst);
+
+unsigned int GetInstructionSize(ShaderInstType instType);
+unsigned int GetCFMaxInstructions(ShaderInstType instType);
+
+GLboolean LinkVertexInstruction(R700ControlFlowGenericClause *pCFGeneric,
+                                                               R700VertexInstruction *pVTXInstruction);
+
+#endif //_R700_SHADERINST_H_
diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c
new file mode 100644 (file)
index 0000000..4341f33
--- /dev/null
@@ -0,0 +1,925 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/state.h"
+#include "main/imports.h"
+#include "main/enums.h"
+#include "main/macros.h"
+#include "main/dd.h"
+#include "main/simple_list.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+#include "tnl/t_vp_build.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "main/api_arrayelt.h"
+#include "main/state.h"
+#include "main/framebuffer.h"
+
+#include "shader/prog_parameter.h"
+#include "shader/prog_statevars.h"
+#include "vbo/vbo.h"
+#include "main/texformat.h"
+
+#include "r600_context.h"
+
+#include "r700_chip.h"
+#include "r700_state.h"
+
+#if 0 /* to be enabled */
+#include "r700_fragprog.h"
+#include "r700_vertprog.h"
+#endif /* to be enabled */
+
+void r700SetDefaultStates(context_t *context) //--------------------
+{
+    
+}
+
+void r700UpdateShaders (GLcontext * ctx)  //----------------------------------
+{
+    context_t *context = R700_CONTEXT(ctx);
+
+    GLvector4f dummy_attrib[_TNL_ATTRIB_MAX];
+    GLvector4f *temp_attrib[_TNL_ATTRIB_MAX];
+#if 0 /* to be enabled */
+    struct r700_vertex_program *vp;
+       int i;
+
+    if (context->NewGLState) 
+    {
+        context->NewGLState = 0;
+
+        for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) 
+        {
+            /* mat states from state var not array for sw */
+            dummy_attrib[i].stride = 0;
+
+            temp_attrib[i] = TNL_CONTEXT(ctx)->vb.AttribPtr[i];
+            TNL_CONTEXT(ctx)->vb.AttribPtr[i] = &(dummy_attrib[i]);
+        }
+
+        _tnl_UpdateFixedFunctionProgram(ctx);
+
+        for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) 
+        {
+            TNL_CONTEXT(ctx)->vb.AttribPtr[i] = temp_attrib[i];
+        }
+
+        r700SelectVertexShader(ctx);
+        vp = (struct r700_vertex_program *)ctx->VertexProgram._Current;
+
+        if (vp->translated == GL_FALSE) 
+        {
+            // TODO
+            //fprintf(stderr, "Failing back to sw-tcl\n");
+            //hw_tcl_on = future_hw_tcl_on = 0;
+            //r300ResetHwState(rmesa);
+            //
+            r700UpdateStateParameters(ctx, _NEW_PROGRAM);
+            return;
+        }
+    }
+
+    r700UpdateStateParameters(ctx, _NEW_PROGRAM);
+#endif /* to be enabled */
+}
+
+/*
+ * To correctly position primitives:
+ */
+void r700UpdateViewportOffset(GLcontext * ctx) //------------------
+{
+}
+
+/**
+ * Tell the card where to render (offset, pitch).
+ * Effected by glDrawBuffer, etc
+ */
+void r700UpdateDrawBuffer(GLcontext * ctx) /* TODO */ //---------------------
+{
+#if 0 /* to be enabled */
+    context_t *context = R700_CONTEXT(ctx);
+
+    switch (ctx->DrawBuffer->_ColorDrawBufferIndexes[0]) 
+    {
+       case BUFFER_FRONT_LEFT:
+           context->target.rt = context->screen->frontBuffer;
+           break;
+       case BUFFER_BACK_LEFT:
+           context->target.rt = context->screen->backBuffer;
+           break;
+       default:
+           memset (&context->target.rt, sizeof(context->target.rt), 0);
+       }
+#endif /* to be enabled */
+}
+
+static void r700FetchStateParameter(GLcontext * ctx,
+                                               const gl_state_index state[STATE_LENGTH],
+                                               GLfloat * value)
+{
+       context_t *context = R700_CONTEXT(ctx);
+
+    /* TODO */
+}
+
+void r700UpdateStateParameters(GLcontext * ctx, GLuint new_state) //--------------------
+{
+#if 0 /* to be enabled */
+       struct r700_fragment_program *fp;
+       struct gl_program_parameter_list *paramList;
+       GLuint i;
+
+       if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM)))
+               return;
+
+       fp = (struct r700_fragment_program *)ctx->FragmentProgram._Current;
+       if (!fp)
+    {
+               return;
+    }
+
+       paramList = fp->mesa_program.Base.Parameters;
+
+       if (!paramList)
+    {
+               return;
+    }
+
+       for (i = 0; i < paramList->NumParameters; i++) 
+    {
+               if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) 
+        {
+                       r700FetchStateParameter(ctx,
+                                               paramList->Parameters[i].
+                                               StateIndexes,
+                                               paramList->ParameterValues[i]);
+               }
+       }
+#endif /* to be enabled */
+}
+
+/**
+ * Called by Mesa after an internal state update.
+ */
+static void r700InvalidateState(GLcontext * ctx, GLuint new_state) //-------------------
+{
+#if 0 /* to be enabled */
+    context_t *context = R700_CONTEXT(ctx);
+
+    R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj);
+
+    _swrast_InvalidateState(ctx, new_state);
+       _swsetup_InvalidateState(ctx, new_state);
+       _vbo_InvalidateState(ctx, new_state);
+       _tnl_InvalidateState(ctx, new_state);
+       _ae_invalidate_state(ctx, new_state);
+
+       if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) 
+    {
+               r700UpdateDrawBuffer(ctx);
+       }
+
+       r700UpdateStateParameters(ctx, new_state);
+
+    if(GL_TRUE == context->bEnablePerspective)
+    {
+        /* Do scale XY and Z by 1/W0 for perspective correction on pos. For orthogonal case, set both to one. */
+        CLEARbit(r700->PA_CL_VTE_CNTL.u32All, VTX_XY_FMT_bit);
+        CLEARbit(r700->PA_CL_VTE_CNTL.u32All, VTX_Z_FMT_bit);
+
+        SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_W0_FMT_bit);
+
+        CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, PERSP_GRADIENT_ENA_bit);
+        SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit);
+    }
+    else
+    {
+        /* For orthogonal case. */
+        SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_XY_FMT_bit);
+        SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_Z_FMT_bit);
+
+        SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_W0_FMT_bit);
+
+        SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, PERSP_GRADIENT_ENA_bit);
+        CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit);
+    }
+
+       context->NewGLState |= new_state;
+#endif /* to be enabled */
+}
+
+static void r700SetDepthState(GLcontext * ctx)
+{
+       context_t *context = R700_CONTEXT(ctx);
+
+    R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj);
+
+    if (ctx->Depth.Test)
+    {
+        SETbit(r700->DB_DEPTH_CONTROL.u32All, Z_ENABLE_bit);
+        if (ctx->Depth.Mask)
+        {
+            SETbit(r700->DB_DEPTH_CONTROL.u32All, Z_WRITE_ENABLE_bit);
+        }
+        else
+        {
+            CLEARbit(r700->DB_DEPTH_CONTROL.u32All, Z_WRITE_ENABLE_bit);
+        }
+
+        switch (ctx->Depth.Func)
+        {
+        case GL_NEVER:            
+            SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_NEVER, 
+                     ZFUNC_shift, ZFUNC_mask);
+            break;
+        case GL_LESS:
+            SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_LESS, 
+                     ZFUNC_shift, ZFUNC_mask);            
+            break;
+        case GL_EQUAL:
+            SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_EQUAL, 
+                     ZFUNC_shift, ZFUNC_mask);
+            break;
+        case GL_LEQUAL:
+            SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_LEQUAL,  
+                     ZFUNC_shift, ZFUNC_mask);
+            break;
+        case GL_GREATER:
+            SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_GREATER,  
+                     ZFUNC_shift, ZFUNC_mask);           
+            break;
+        case GL_NOTEQUAL:
+            SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_NOTEQUAL,  
+                     ZFUNC_shift, ZFUNC_mask);
+            break;
+        case GL_GEQUAL:
+            SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_GEQUAL,  
+                     ZFUNC_shift, ZFUNC_mask);
+            break;
+        case GL_ALWAYS:
+            SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_ALWAYS,  
+                     ZFUNC_shift, ZFUNC_mask);
+            break;
+        default:
+            SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_ALWAYS,  
+                     ZFUNC_shift, ZFUNC_mask);
+            break;
+        }
+    }
+    else
+    {
+        CLEARbit(r700->DB_DEPTH_CONTROL.u32All, Z_ENABLE_bit);
+        CLEARbit(r700->DB_DEPTH_CONTROL.u32All, Z_WRITE_ENABLE_bit);
+    }
+}
+
+static void r700AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref) //---------------
+{
+}
+
+
+static void r700BlendColor(GLcontext * ctx, const GLfloat cf[4]) //----------------
+{
+}
+
+static void r700BlendEquationSeparate(GLcontext * ctx,
+                                                     GLenum modeRGB, GLenum modeA) //-----------------
+{
+}
+
+static void r700BlendFuncSeparate(GLcontext * ctx,
+                                 GLenum sfactorRGB, GLenum dfactorRGB,
+                                 GLenum sfactorA, GLenum dfactorA) //------------------------
+{
+}
+
+static void r700UpdateCulling(GLcontext * ctx)
+{
+    R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(R700_CONTEXT(ctx)->chipobj.pvChipObj);
+
+    CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, FACE_bit);
+    CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit);
+    CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit);
+
+    if (ctx->Polygon.CullFlag) 
+    {
+        switch (ctx->Polygon.CullFaceMode) 
+        {
+        case GL_FRONT:            
+            SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit);
+            CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit);
+            break;
+        case GL_BACK:
+            CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit);
+            SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit);
+            break;
+        case GL_FRONT_AND_BACK:
+            SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit);
+            SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit);
+            break;
+        default:
+            CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit);
+            CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit);
+            break;
+        }
+    }
+
+    switch (ctx->Polygon.FrontFace) 
+    {
+        case GL_CW:
+            SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, FACE_bit);
+            break;
+        case GL_CCW:
+            CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, FACE_bit); 
+            break;
+        default:
+            CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, FACE_bit); /* default: ccw */
+            break;
+    }
+}
+
+static void r700Enable(GLcontext * ctx, GLenum cap, GLboolean state) //------------------
+{
+    switch (cap) 
+    {
+        case GL_TEXTURE_1D:
+        case GL_TEXTURE_2D:
+        case GL_TEXTURE_3D:            
+            break;
+        case GL_FOG:           
+            break;
+        case GL_ALPHA_TEST:            
+            break;
+        case GL_COLOR_LOGIC_OP:                
+        case GL_BLEND:         
+            break;
+        case GL_CLIP_PLANE0:
+        case GL_CLIP_PLANE1:
+        case GL_CLIP_PLANE2:
+        case GL_CLIP_PLANE3:
+        case GL_CLIP_PLANE4:
+        case GL_CLIP_PLANE5:           
+            break;
+        case GL_DEPTH_TEST:
+            r700SetDepthState(ctx);
+            break;
+        case GL_STENCIL_TEST:          
+            break;
+        case GL_CULL_FACE:
+           r700UpdateCulling(ctx);             
+           break;
+        case GL_POLYGON_OFFSET_POINT:
+        case GL_POLYGON_OFFSET_LINE:
+        case GL_POLYGON_OFFSET_FILL:           
+            break;
+        default:               
+            break;
+    }
+}
+
+/**
+ * Handle glColorMask()
+ */
+static void r700ColorMask(GLcontext * ctx,
+                         GLboolean r, GLboolean g, GLboolean b, GLboolean a) //------------------
+{
+}
+
+/**
+ * Change the depth testing function.
+ *
+ * \note Mesa already filters redundant calls to this function.
+ */
+static void r700DepthFunc(GLcontext * ctx, GLenum func) //--------------------
+{
+    r700SetDepthState(ctx);
+}
+
+/**
+ * Enable/Disable depth writing.
+ *
+ * \note Mesa already filters redundant calls to this function.
+ */
+static void r700DepthMask(GLcontext * ctx, GLboolean mask) //------------------
+{
+    r700SetDepthState(ctx);
+}
+
+/**
+ * Change the culling mode.
+ *
+ * \note Mesa already filters redundant calls to this function.
+ */
+static void r700CullFace(GLcontext * ctx, GLenum mode) //-----------------
+{
+    r700UpdateCulling(ctx);
+}
+
+/* =============================================================
+ * Fog
+ */
+static void r700Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param) //--------------
+{
+}
+
+/**
+ * Change the polygon orientation.
+ *
+ * \note Mesa already filters redundant calls to this function.
+ */
+static void r700FrontFace(GLcontext * ctx, GLenum mode) //------------------
+{
+    r700UpdateCulling(ctx);
+}
+
+static void r700ShadeModel(GLcontext * ctx, GLenum mode) //--------------------
+{
+}
+
+static void r700PointParameter(GLcontext * ctx, GLenum pname, const GLfloat * param) //---------------
+{
+}
+
+static void r700StencilFuncSeparate(GLcontext * ctx, GLenum face,
+                                   GLenum func, GLint ref, GLuint mask) //---------------------
+{
+}
+
+
+static void r700StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask) //--------------
+{
+}
+
+static void r700StencilOpSeparate(GLcontext * ctx, GLenum face,
+                                 GLenum fail, GLenum zfail, GLenum zpass) //--------------------
+{
+}
+
+#define SUBPIXEL_X 0.125
+#define SUBPIXEL_Y 0.125
+
+static void r700Viewport(GLcontext * ctx, 
+                         GLint x, 
+                         GLint y,
+                                    GLsizei width, 
+                         GLsizei height) //--------------------
+{
+#if 0 /* to be enabled */
+    context_t *context = R700_CONTEXT(ctx);
+
+    R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj);
+
+    context->vport_x = x; 
+    context->vport_y = y;
+    context->vport_width = width;
+    context->vport_height= height;
+
+    __DRIdrawablePrivate *dPriv = context->currentDraw;
+
+    GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0;
+    GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0;
+
+    const GLfloat *v = ctx->Viewport._WindowMap.m;
+
+    GLfloat sx, tx, sy, ty, sz, tz;
+    GLfloat scale;
+
+    switch (ctx->Visual.depthBits) 
+    {
+    case 16:
+        scale = 1.0 / (GLfloat) 0xffff;        
+        break;
+    case 24:
+        scale = 1.0 / (GLfloat) 0xffffff;        
+        break;
+    default:
+        fprintf(stderr, "Error: Unsupported depth %d... exiting\n",
+            ctx->Visual.depthBits);
+        _mesa_exit(-1);
+    }
+
+    sx = v[MAT_SX];
+       tx = v[MAT_TX] + xoffset + SUBPIXEL_X;
+       sy = -v[MAT_SY];
+       ty = (-v[MAT_TY]) + yoffset + SUBPIXEL_Y;
+       sz = v[MAT_SZ] * scale;
+       tz = v[MAT_TZ] * scale;
+
+    /* TODO : Need DMA flush as well. */
+
+    if(context->cmdbuf.count_used > 0)
+    {
+           (context->chipobj.FlushCmdBuffer)(context);
+    }
+
+    r700->PA_CL_VPORT_XSCALE.u32All  = *((unsigned int*)(&sx));
+    r700->PA_CL_VPORT_XOFFSET.u32All = *((unsigned int*)(&tx));
+
+    r700->PA_CL_VPORT_YSCALE.u32All  = *((unsigned int*)(&sy));
+    r700->PA_CL_VPORT_YOFFSET.u32All = *((unsigned int*)(&ty));
+
+    r700->PA_CL_VPORT_ZSCALE.u32All  = *((unsigned int*)(&sz));
+    r700->PA_CL_VPORT_ZOFFSET.u32All = *((unsigned int*)(&tz));
+#endif /* to be enabled */
+}
+
+
+static void r700DepthRange(GLcontext * ctx, GLclampd nearval, GLclampd farval) //-------------
+{
+}
+
+static void r700PointSize(GLcontext * ctx, GLfloat size) //-------------------
+{
+}
+
+static void r700LineWidth(GLcontext * ctx, GLfloat widthf) //---------------
+{
+}
+
+static void r700PolygonOffset(GLcontext * ctx, GLfloat factor, GLfloat units) //--------------
+{
+}
+
+
+static void r700PolygonMode(GLcontext * ctx, GLenum face, GLenum mode) //------------------
+{
+}
+static void r700RenderMode(GLcontext * ctx, GLenum mode) //---------------------
+{
+}
+
+static void r700ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq ) //-----------------
+{
+}
+
+static void r700Scissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h) //---------------
+{
+    if (ctx->Scissor.Enabled) 
+    {
+               /* We don't pipeline cliprect changes */
+               /* r700Flush(ctx); */
+
+        //__DRIdrawablePrivate *dPriv = radeon->dri.drawable;
+               //int x1 = dPriv->x + ctx->Scissor.X;
+               //int y1 = dPriv->y + dPriv->h - (ctx->Scissor.Y + ctx->Scissor.Height);
+
+               //radeon->state.scissor.rect.x1 = x1;
+               //radeon->state.scissor.rect.y1 = y1;
+               //radeon->state.scissor.rect.x2 = x1 + ctx->Scissor.Width;
+               //radeon->state.scissor.rect.y2 = y1 + ctx->Scissor.Height;
+               /* radeonRecalcScissorRects(radeon); */
+       }
+}
+
+
+/**
+ * Calculate initial hardware state and register state functions.
+ * Assumes that the command buffer and state atoms have been
+ * initialized already.
+ */
+void r700InitState(GLcontext * ctx) //-------------------
+{
+#if 0 /* to be enabled */
+    context_t *context = R700_CONTEXT(ctx);
+
+    R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj);
+    if(context->ctx->Visual.doubleBufferMode && context->sarea->pfCurrentPage == 0) 
+    {
+               context->target.rt = context->screen->backBuffer;
+       } 
+    else 
+    {
+               context->target.rt = context->screen->frontBuffer;
+       }
+
+    SETfield(r700->CB_TARGET_MASK.u32All, 0xF, TARGET0_ENABLE_shift, TARGET0_ENABLE_mask);
+    SETfield(r700->CB_SHADER_MASK.u32All, 0xF, OUTPUT0_ENABLE_shift, OUTPUT0_ENABLE_mask);
+
+    /* screen */
+    r700->PA_SC_SCREEN_SCISSOR_TL.u32All = 0x0;
+    SETfield(r700->PA_SC_SCREEN_SCISSOR_BR.u32All, context->screen->width,  
+             PA_SC_SCREEN_SCISSOR_BR__BR_X_shift, PA_SC_SCREEN_SCISSOR_BR__BR_X_mask);
+    SETfield(r700->PA_SC_SCREEN_SCISSOR_BR.u32All, context->screen->height, 
+             PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift, PA_SC_SCREEN_SCISSOR_BR__BR_Y_mask);
+
+    /* window */
+    SETbit(r700->PA_SC_WINDOW_SCISSOR_TL.u32All, WINDOW_OFFSET_DISABLE_bit);
+    SETfield(r700->PA_SC_WINDOW_SCISSOR_TL.u32All, context->currentDraw->x, 
+             PA_SC_WINDOW_SCISSOR_TL__TL_X_shift, PA_SC_WINDOW_SCISSOR_TL__TL_X_mask);
+    SETfield(r700->PA_SC_WINDOW_SCISSOR_TL.u32All, context->currentDraw->y, 
+             PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift, PA_SC_WINDOW_SCISSOR_TL__TL_Y_mask);
+
+       SETfield(r700->PA_SC_WINDOW_SCISSOR_BR.u32All, context->currentDraw->x + context->currentDraw->w, 
+             PA_SC_WINDOW_SCISSOR_BR__BR_X_shift, PA_SC_WINDOW_SCISSOR_BR__BR_X_mask);
+    SETfield(r700->PA_SC_WINDOW_SCISSOR_BR.u32All, context->currentDraw->y + context->currentDraw->h, 
+             PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift, PA_SC_WINDOW_SCISSOR_BR__BR_Y_mask);
+
+    /* 4 clip rectangles */ /* TODO : set these clip rects according to context->currentDraw->numClipRects */
+       r700->PA_SC_CLIPRECT_RULE.u32All = 0x0000FFFF;
+
+    SETfield(r700->PA_SC_CLIPRECT_0_TL.u32All, context->currentDraw->x, 
+             PA_SC_CLIPRECT_0_TL__TL_X_shift, PA_SC_CLIPRECT_0_TL__TL_X_mask);
+    SETfield(r700->PA_SC_CLIPRECT_0_TL.u32All, context->currentDraw->y, 
+             PA_SC_CLIPRECT_0_TL__TL_Y_shift, PA_SC_CLIPRECT_0_TL__TL_Y_mask);
+       SETfield(r700->PA_SC_CLIPRECT_0_BR.u32All, context->currentDraw->x + context->currentDraw->w, 
+             PA_SC_CLIPRECT_0_BR__BR_X_shift, PA_SC_CLIPRECT_0_BR__BR_X_mask);
+    SETfield(r700->PA_SC_CLIPRECT_0_BR.u32All, context->currentDraw->y + context->currentDraw->h, 
+             PA_SC_CLIPRECT_0_BR__BR_Y_shift, PA_SC_CLIPRECT_0_BR__BR_Y_mask);
+
+    r700->PA_SC_CLIPRECT_1_TL.u32All = r700->PA_SC_CLIPRECT_0_TL.u32All;
+       r700->PA_SC_CLIPRECT_1_BR.u32All = r700->PA_SC_CLIPRECT_0_BR.u32All;
+    r700->PA_SC_CLIPRECT_2_TL.u32All = r700->PA_SC_CLIPRECT_0_TL.u32All;
+       r700->PA_SC_CLIPRECT_2_BR.u32All = r700->PA_SC_CLIPRECT_0_BR.u32All;
+    r700->PA_SC_CLIPRECT_3_TL.u32All = r700->PA_SC_CLIPRECT_0_TL.u32All;
+       r700->PA_SC_CLIPRECT_3_BR.u32All = r700->PA_SC_CLIPRECT_0_BR.u32All;
+
+    /* more....2d clip */
+    SETbit(r700->PA_SC_GENERIC_SCISSOR_TL.u32All, WINDOW_OFFSET_DISABLE_bit);
+    SETfield(r700->PA_SC_GENERIC_SCISSOR_TL.u32All, context->currentDraw->x, 
+             PA_SC_GENERIC_SCISSOR_TL__TL_X_shift, PA_SC_GENERIC_SCISSOR_TL__TL_X_mask);
+    SETfield(r700->PA_SC_GENERIC_SCISSOR_TL.u32All, context->currentDraw->y, 
+             PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift, PA_SC_GENERIC_SCISSOR_TL__TL_Y_mask);
+    SETfield(r700->PA_SC_GENERIC_SCISSOR_BR.u32All, context->currentDraw->x + context->currentDraw->w, 
+             PA_SC_GENERIC_SCISSOR_BR__BR_X_shift, PA_SC_GENERIC_SCISSOR_BR__BR_X_mask);
+    SETfield(r700->PA_SC_GENERIC_SCISSOR_BR.u32All, context->currentDraw->y + context->currentDraw->h, 
+             PA_SC_GENERIC_SCISSOR_BR__BR_Y_shift, PA_SC_GENERIC_SCISSOR_BR__BR_Y_mask);
+
+    SETbit(r700->PA_SC_VPORT_SCISSOR_0_TL.u32All, WINDOW_OFFSET_DISABLE_bit);
+    SETfield(r700->PA_SC_VPORT_SCISSOR_0_TL.u32All, context->currentDraw->x, 
+             PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift, PA_SC_VPORT_SCISSOR_0_TL__TL_X_mask);
+    SETfield(r700->PA_SC_VPORT_SCISSOR_0_TL.u32All, context->currentDraw->y, 
+             PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift, PA_SC_VPORT_SCISSOR_0_TL__TL_Y_mask);
+    SETfield(r700->PA_SC_VPORT_SCISSOR_0_BR.u32All, context->currentDraw->x + context->currentDraw->w, 
+             PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift, PA_SC_VPORT_SCISSOR_0_BR__BR_X_mask);
+    SETfield(r700->PA_SC_VPORT_SCISSOR_0_BR.u32All, context->currentDraw->y + context->currentDraw->h, 
+             PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift, PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask);
+    
+    SETbit(r700->PA_SC_VPORT_SCISSOR_1_TL.u32All, WINDOW_OFFSET_DISABLE_bit);
+    SETfield(r700->PA_SC_VPORT_SCISSOR_1_TL.u32All, context->currentDraw->x, 
+             PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift, PA_SC_VPORT_SCISSOR_0_TL__TL_X_mask);
+    SETfield(r700->PA_SC_VPORT_SCISSOR_1_TL.u32All, context->currentDraw->y, 
+             PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift, PA_SC_VPORT_SCISSOR_0_TL__TL_Y_mask);
+    SETfield(r700->PA_SC_VPORT_SCISSOR_1_BR.u32All, context->currentDraw->x + context->currentDraw->w, 
+             PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift, PA_SC_VPORT_SCISSOR_0_BR__BR_X_mask);
+    SETfield(r700->PA_SC_VPORT_SCISSOR_1_BR.u32All, context->currentDraw->y + context->currentDraw->h, 
+             PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift, PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask);
+
+    /* setup viewport */
+    r700Viewport(ctx, 
+                 0,
+                 0,
+                            context->currentDraw->w,
+                 context->currentDraw->h);
+    
+    /* Turn off vgt reuse */
+    r700->VGT_REUSE_OFF.u32All = 0;
+    SETbit(r700->VGT_REUSE_OFF.u32All, REUSE_OFF_bit);
+
+    /* Specify offsetting and clamp values for vertices */
+    r700->VGT_MAX_VTX_INDX.u32All      = 0xFFFFFF;
+    r700->VGT_MIN_VTX_INDX.u32All      = 0;
+    r700->VGT_INDX_OFFSET.u32All    = 0;
+
+    /* Specify the number of instances */
+    r700->VGT_DMA_NUM_INSTANCES.u32All = 1;
+
+    /* not alpha blend */
+    CLEARfield(r700->SX_ALPHA_TEST_CONTROL.u32All, ALPHA_FUNC_mask); 
+    CLEARbit(r700->SX_ALPHA_TEST_CONTROL.u32All, ALPHA_TEST_ENABLE_bit);
+
+    /* defualt shader connections. */
+    r700->SPI_VS_OUT_ID_0.u32All  = 0x03020100;
+    r700->SPI_VS_OUT_ID_1.u32All  = 0x07060504;
+
+    r700->SPI_PS_INPUT_CNTL_0.u32All  = 0x00000800;
+    r700->SPI_PS_INPUT_CNTL_1.u32All  = 0x00000801;
+    r700->SPI_PS_INPUT_CNTL_2.u32All  = 0x00000802;
+
+    SETfield(r700->CB_COLOR_CONTROL.u32All, 0xCC, ROP3_shift, ROP3_mask);
+    CLEARbit(r700->CB_COLOR_CONTROL.u32All, PER_MRT_BLEND_bit);
+    CLEARfield(r700->CB_BLEND0_CONTROL.u32All, COLOR_SRCBLEND_mask); /* no dst blend */
+    CLEARfield(r700->CB_BLEND0_CONTROL.u32All, ALPHA_SRCBLEND_mask); /* no dst blend */
+  
+    r700->DB_SHADER_CONTROL.u32All = 0;
+    SETbit(r700->DB_SHADER_CONTROL.u32All, DUAL_EXPORT_ENABLE_bit);
+
+    /* Set up the culling control register */ 
+    SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_TRIANGLES, 
+             POLYMODE_FRONT_PTYPE_shift, POLYMODE_FRONT_PTYPE_mask); 
+    SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_TRIANGLES, 
+             POLYMODE_BACK_PTYPE_shift, POLYMODE_BACK_PTYPE_mask); 
+
+    /* Do scale XY and Z by 1/W0. */
+    context->bEnablePerspective = GL_TRUE;
+    CLEARbit(r700->PA_CL_VTE_CNTL.u32All, VTX_XY_FMT_bit);
+    CLEARbit(r700->PA_CL_VTE_CNTL.u32All, VTX_Z_FMT_bit);
+    SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_W0_FMT_bit);
+
+    /* Enable viewport scaling for all three axis */
+    SETbit(r700->PA_CL_VTE_CNTL.u32All, VPORT_X_SCALE_ENA_bit);
+    SETbit(r700->PA_CL_VTE_CNTL.u32All, VPORT_X_OFFSET_ENA_bit);
+    SETbit(r700->PA_CL_VTE_CNTL.u32All, VPORT_Y_SCALE_ENA_bit);
+    SETbit(r700->PA_CL_VTE_CNTL.u32All, VPORT_Y_OFFSET_ENA_bit);
+    SETbit(r700->PA_CL_VTE_CNTL.u32All, VPORT_Z_SCALE_ENA_bit);
+    SETbit(r700->PA_CL_VTE_CNTL.u32All, VPORT_Z_OFFSET_ENA_bit);
+
+    /* Set up point sizes and min/max values */
+    SETfield(r700->PA_SU_POINT_SIZE.u32All, 0x8, 
+             PA_SU_POINT_SIZE__HEIGHT_shift, PA_SU_POINT_SIZE__HEIGHT_mask);
+    SETfield(r700->PA_SU_POINT_SIZE.u32All, 0x8,
+             PA_SU_POINT_SIZE__WIDTH_shift, PA_SU_POINT_SIZE__WIDTH_mask);
+    CLEARfield(r700->PA_SU_POINT_MINMAX.u32All, MIN_SIZE_mask);
+    SETfield(r700->PA_SU_POINT_MINMAX.u32All, 0x8000, MAX_SIZE_shift, MAX_SIZE_mask);
+
+    /* Set up line control */
+       SETfield(r700->PA_SU_LINE_CNTL.u32All, 0x8, 
+             PA_SU_LINE_CNTL__WIDTH_shift, PA_SU_LINE_CNTL__WIDTH_mask);
+       
+    r700->PA_SC_LINE_CNTL.u32All = 0;
+    CLEARbit(r700->PA_SC_LINE_CNTL.u32All, EXPAND_LINE_WIDTH_bit); 
+    SETbit(r700->PA_SC_LINE_CNTL.u32All, LAST_PIXEL_bit); 
+
+    /* Set up vertex control */
+    r700->PA_SU_VTX_CNTL.u32All = 0;
+    CLEARfield(r700->PA_SU_VTX_CNTL.u32All, QUANT_MODE_mask); 
+    SETbit(r700->PA_SU_VTX_CNTL.u32All, PIX_CENTER_bit); 
+    SETfield(r700->PA_SU_VTX_CNTL.u32All, X_ROUND_TO_EVEN, 
+             PA_SU_VTX_CNTL__ROUND_MODE_shift, PA_SU_VTX_CNTL__ROUND_MODE_mask); 
+
+    /* to 1.0 = no guard band */ 
+    r700->PA_CL_GB_VERT_CLIP_ADJ.u32All  = 0x3F800000;  /* 1.0 */
+    r700->PA_CL_GB_VERT_DISC_ADJ.u32All  = 0x3F800000;  
+    r700->PA_CL_GB_HORZ_CLIP_ADJ.u32All  = 0x3F800000;  
+    r700->PA_CL_GB_HORZ_DISC_ADJ.u32All  = 0x3F800000;  
+
+    /* Disble color compares */
+    SETfield(r700->CB_CLRCMP_CONTROL.u32All, CLRCMP_DRAW_ALWAYS, 
+             CLRCMP_FCN_SRC_shift, CLRCMP_FCN_SRC_mask); 
+    SETfield(r700->CB_CLRCMP_CONTROL.u32All, CLRCMP_DRAW_ALWAYS, 
+             CLRCMP_FCN_DST_shift, CLRCMP_FCN_DST_mask); 
+    SETfield(r700->CB_CLRCMP_CONTROL.u32All, CLRCMP_SEL_SRC, 
+             CLRCMP_FCN_SEL_shift, CLRCMP_FCN_SEL_mask); 
+
+    /* Zero out source */
+    r700->CB_CLRCMP_SRC.u32All = 0x00000000;
+
+    /* Put a compare color in for error checking */
+    r700->CB_CLRCMP_DST.u32All = 0x000000FF;
+
+    /* Set up color compare mask */
+    r700->CB_CLRCMP_MSK.u32All = 0xFFFFFFFF;
+
+    /* Enable all samples for multi-sample anti-aliasing */
+    r700->PA_SC_AA_MASK.u32All = 0xFFFFFFFF;
+    /* Turn off AA */
+    r700->PA_SC_AA_CONFIG.u32All = 0;
+
+    SETfield(r700->VGT_OUT_DEALLOC_CNTL.u32All, 16, DEALLOC_DIST_shift, DEALLOC_DIST_mask);
+    SETfield(r700->VGT_VERTEX_REUSE_BLOCK_CNTL.u32All, 14, VTX_REUSE_DEPTH_shift, VTX_REUSE_DEPTH_mask);
+
+    r700->SX_MISC.u32All = 0;
+
+    /* depth buf */ 
+       r700->DB_DEPTH_SIZE.u32All = 0;
+       SETfield(r700->DB_DEPTH_SIZE.u32All, (context->screen->depthBuffer.pitch/8)-1,
+             PITCH_TILE_MAX_shift, PITCH_TILE_MAX_mask);
+    SETfield(r700->DB_DEPTH_SIZE.u32All, ( (context->screen->depthBuffer.size / context->screen->cpp)/64 )-1,
+             SLICE_TILE_MAX_shift, SLICE_TILE_MAX_mask); /* size in pixel / 64 - 1 */
+    r700->DB_DEPTH_BASE.u32All = context->screen->depthBuffer.gpu >> 8;
+    r700->DB_DEPTH_INFO.u32All = 0;
+    if(4 == context->screen->cpp) /* TODO : in scrren create, gives z its own format alloc. */
+    {
+        switch (ctx->Visual.depthBits) 
+        {
+        case 16:           
+        case 24:
+            SETfield(r700->DB_DEPTH_INFO.u32All, DEPTH_8_24, 
+                     DB_DEPTH_INFO__FORMAT_shift, DB_DEPTH_INFO__FORMAT_mask);       
+            break;
+        default:
+            fprintf(stderr, "Error: Unsupported depth %d... exiting\n",
+                ctx->Visual.depthBits);
+            _mesa_exit(-1);
+        }
+    }
+    else
+    {
+        SETfield(r700->DB_DEPTH_INFO.u32All, DEPTH_16, 
+                     DB_DEPTH_INFO__FORMAT_shift, DB_DEPTH_INFO__FORMAT_mask);          
+    } 
+    SETfield(r700->DB_DEPTH_INFO.u32All, ARRAY_2D_TILED_THIN1, 
+             DB_DEPTH_INFO__ARRAY_MODE_shift, DB_DEPTH_INFO__ARRAY_MODE_mask); 
+    /* r700->DB_PREFETCH_LIMIT.bits.DEPTH_HEIGHT_TILE_MAX = (context->currentDraw->h >> 3) - 1; */ /* z buffer sie may much bigger than what need, so use actual used h. */
+    r700->DB_DEPTH_CONTROL.u32All   = 0;
+    r700->DB_DEPTH_CLEAR.u32All     = 0x3F800000;
+    r700->DB_DEPTH_VIEW.u32All      = 0;
+    r700->DB_RENDER_CONTROL.u32All  = 0;
+    r700->DB_RENDER_OVERRIDE.u32All = 0;
+    SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIZ_ENABLE_shift, FORCE_HIZ_ENABLE_mask); 
+    SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE0_shift, FORCE_HIS_ENABLE0_mask); 
+    SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE1_shift, FORCE_HIS_ENABLE1_mask); 
+    
+    /* color buffer */ 
+    SETfield(r700->CB_COLOR0_SIZE.u32All, (context->screen->frontBuffer.pitch/8)-1,
+             PITCH_TILE_MAX_shift, PITCH_TILE_MAX_mask);
+    SETfield(r700->CB_COLOR0_SIZE.u32All, ( (context->screen->frontBuffer.size / context->screen->cpp)/64 )-1,
+             SLICE_TILE_MAX_shift, SLICE_TILE_MAX_mask);
+    r700->CB_COLOR0_BASE.u32All = context->screen->frontBuffer.gpu >> 8;
+    SETfield(r700->CB_COLOR0_INFO.u32All, ENDIAN_NONE, ENDIAN_shift, ENDIAN_mask);
+    SETfield(r700->CB_COLOR0_INFO.u32All, ARRAY_LINEAR_GENERAL, 
+             CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask); 
+    if(4 == context->screen->cpp)
+    {
+        SETfield(r700->CB_COLOR0_INFO.u32All, COLOR_8_8_8_8,
+                 CB_COLOR0_INFO__FORMAT_shift, CB_COLOR0_INFO__FORMAT_mask);
+        SETfield(r700->CB_COLOR0_INFO.u32All, SWAP_ALT, COMP_SWAP_shift, COMP_SWAP_mask);
+    }
+    else
+    {
+        SETfield(r700->CB_COLOR0_INFO.u32All, COLOR_5_6_5,
+                 CB_COLOR0_INFO__FORMAT_shift, CB_COLOR0_INFO__FORMAT_mask);
+        SETfield(r700->CB_COLOR0_INFO.u32All, SWAP_ALT_REV, 
+                 COMP_SWAP_shift, COMP_SWAP_mask);        
+    } 
+    SETbit(r700->CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+    SETbit(r700->CB_COLOR0_INFO.u32All, BLEND_CLAMP_bit);
+    SETfield(r700->CB_COLOR0_INFO.u32All, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+    r700->CB_COLOR0_VIEW.u32All   = 0;
+    r700->CB_COLOR0_TILE.u32All   = 0;
+    r700->CB_COLOR0_FRAG.u32All   = 0;
+    r700->CB_COLOR0_MASK.u32All   = 0;
+
+       r700->PA_SC_VPORT_ZMAX_0.u32All = 0x3F800000;
+#endif /* to be enabled */
+}
+
+void r700InitStateFuncs(struct dd_function_table *functions) //-----------------
+{
+       functions->UpdateState = r700InvalidateState;
+       functions->AlphaFunc = r700AlphaFunc;
+       functions->BlendColor = r700BlendColor;
+       functions->BlendEquationSeparate = r700BlendEquationSeparate;
+       functions->BlendFuncSeparate = r700BlendFuncSeparate;
+       functions->Enable = r700Enable;
+       functions->ColorMask = r700ColorMask;
+       functions->DepthFunc = r700DepthFunc;
+       functions->DepthMask = r700DepthMask;
+       functions->CullFace = r700CullFace;
+       functions->Fogfv = r700Fogfv;
+       functions->FrontFace = r700FrontFace;
+       functions->ShadeModel = r700ShadeModel;
+
+       /* ARB_point_parameters */
+       functions->PointParameterfv = r700PointParameter;
+
+       /* Stencil related */
+       functions->StencilFuncSeparate = r700StencilFuncSeparate;
+       functions->StencilMaskSeparate = r700StencilMaskSeparate;
+       functions->StencilOpSeparate = r700StencilOpSeparate;
+
+       /* Viewport related */
+       functions->Viewport = r700Viewport;
+       functions->DepthRange = r700DepthRange;
+       functions->PointSize = r700PointSize;
+       functions->LineWidth = r700LineWidth;
+
+       functions->PolygonOffset = r700PolygonOffset;
+       functions->PolygonMode = r700PolygonMode;
+
+       functions->RenderMode = r700RenderMode;
+
+       functions->ClipPlane = r700ClipPlane;
+
+    functions->Scissor = r700Scissor;
+}
+
diff --git a/src/mesa/drivers/dri/r600/r700_state.h b/src/mesa/drivers/dri/r600/r700_state.h
new file mode 100644 (file)
index 0000000..b9542f4
--- /dev/null
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#ifndef _R700_STATE_H
+#define _R700_STATE_H
+
+#include "main/mtypes.h"
+
+#include "r600_context.h"
+
+#include "r700_chip.h"
+
+extern void r700UpdateStateParameters(GLcontext * ctx, GLuint new_state);
+extern void r700UpdateShaders (GLcontext * ctx);
+
+extern void r700UpdateViewportOffset(GLcontext * ctx);
+extern void r700UpdateDrawBuffer (GLcontext * ctx);
+
+extern void r700InitState (GLcontext * ctx);
+extern void r700InitStateFuncs (struct dd_function_table *functions);
+
+extern void r700SetDefaultStates(context_t * context);
+
+#endif /* _R600_SCREEN_H */
diff --git a/src/mesa/drivers/dri/r600/r700_tex.c b/src/mesa/drivers/dri/r600/r700_tex.c
new file mode 100644 (file)
index 0000000..75300d2
--- /dev/null
@@ -0,0 +1,1555 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/colormac.h"
+#include "main/context.h"
+#include "main/simple_list.h"
+#include "main/texformat.h"
+#include "main/texstore.h"
+#include "texmem.h"
+#include "main/teximage.h"
+#include "main/texobj.h"
+#include "main/macros.h"
+#include "xmlpool.h"
+
+#include "radeon_common.h"
+
+#include "r600_context.h"
+#include "r700_chip.h"
+
+#if 0 /* to be enabled */
+#include "r700_state.h"
+#endif /* to be enabled */
+
+#include "r700_tex.h"
+
+GLuint r700GetTexObjSize(void)  
+{
+    return sizeof(r700TexObj);
+}
+
+/* to be enable */
+void r700SetTexBuffer(__DRIcontext *pDRICtx, GLint target,
+                            __DRIdrawable *dPriv)
+{
+}
+
+/* to be enable */
+void r700SetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
+                             GLint format, __DRIdrawable *dPriv)
+{
+}
+
+/* to be enable */
+void r700SetTexOffset(__DRIcontext *pDRICtx, GLint texname,
+                            unsigned long long offset, GLint depth,
+                            GLuint pitch)
+{
+}
+
+#if 0 /* to be enabled */
+static GLboolean r700GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_format)
+{
+    r700TexObjPtr t = (r700TexObjPtr) tObj->DriverData;
+
+    t->texture_state.SQ_TEX_RESOURCE4.u32All &= ~( SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask
+                                                  |SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask
+                                                  |SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask
+                                                  |SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask );
+
+    switch (mesa_format) /* This is mesa format. */
+    {
+    case MESA_FORMAT_RGBA8888:        
+        SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_8_8_8_8,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+        t->texture_state.SQ_TEX_RESOURCE4.u32All |=
+                   (SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
+                  |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
+                  |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+                
+        break;
+    case MESA_FORMAT_RGBA8888_REV: 
+        SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_8_8_8_8,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); 
+        
+        t->texture_state.SQ_TEX_RESOURCE4.u32All |=
+                   (SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
+                  |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
+                  |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+        
+        break;
+    case MESA_FORMAT_ARGB8888: 
+        SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_8_8_8_8,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+        t->texture_state.SQ_TEX_RESOURCE4.u32All |=
+                   (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
+                  |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
+                  |(SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+        break;
+    case MESA_FORMAT_ARGB8888_REV: 
+        SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_8_8_8_8,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+        t->texture_state.SQ_TEX_RESOURCE4.u32All |=
+                   (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
+                  |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
+                  |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
+                  |(SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);     
+        break;
+    case MESA_FORMAT_RGB888: 
+        SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_8_8_8,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);  
+        
+        t->texture_state.SQ_TEX_RESOURCE4.u32All |=
+                   (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
+                  |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
+                  |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+        break;
+    case MESA_FORMAT_RGB565: 
+        SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_5_6_5,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); 
+        
+        t->texture_state.SQ_TEX_RESOURCE4.u32All |=
+                   (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
+                  |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
+                  |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+        break;
+    case MESA_FORMAT_RGB565_REV: 
+        SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_5_6_5,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);  
+        
+        t->texture_state.SQ_TEX_RESOURCE4.u32All |=
+                   (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
+                  |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
+                  |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
+                  |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+        break;
+    case MESA_FORMAT_ARGB4444: 
+        SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_4_4_4_4,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);  
+        
+        t->texture_state.SQ_TEX_RESOURCE4.u32All |=
+                   (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
+                  |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
+                  |(SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+        break;
+    case MESA_FORMAT_ARGB4444_REV: 
+        SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_4_4_4_4,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);   
+        
+        t->texture_state.SQ_TEX_RESOURCE4.u32All |=
+                   (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
+                  |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
+                  |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
+                  |(SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+        break;
+    case MESA_FORMAT_ARGB1555: 
+        SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_1_5_5_5,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); 
+        
+        t->texture_state.SQ_TEX_RESOURCE4.u32All |=
+                   (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
+                  |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
+                  |(SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+        break;
+    case MESA_FORMAT_ARGB1555_REV: 
+        SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_1_5_5_5,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);  
+        
+        t->texture_state.SQ_TEX_RESOURCE4.u32All |=
+                   (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
+                  |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
+                  |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
+                  |(SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+        break;
+    case MESA_FORMAT_AL88: 
+    case MESA_FORMAT_AL88_REV: /* TODO : Check this. */
+        SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_8_8,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);  
+        
+        t->texture_state.SQ_TEX_RESOURCE4.u32All |=
+                   (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
+                  |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+        break; 
+    case MESA_FORMAT_RGB332: 
+        SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_3_3_2,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); 
+        
+        t->texture_state.SQ_TEX_RESOURCE4.u32All |=
+                   (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
+                  |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
+                  |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+        break; 
+    case MESA_FORMAT_A8: /* ZERO, ZERO, ZERO, X */
+        SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_8,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);  
+        
+        t->texture_state.SQ_TEX_RESOURCE4.u32All |=
+                   (SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
+                  |(SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
+                  |(SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+        break;
+    case MESA_FORMAT_L8: /* X, X, X, ONE */
+        SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_8,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);  
+        
+        t->texture_state.SQ_TEX_RESOURCE4.u32All |=
+                   (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
+                  |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+        break;
+    case MESA_FORMAT_I8: /* X, X, X, X */
+    case MESA_FORMAT_CI8:
+        SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_8,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);  
+        
+        t->texture_state.SQ_TEX_RESOURCE4.u32All |=
+                   (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+        break;    
+    /* YUV422 TODO conversion */  /* X, Y, Z, ONE, G8R8_G8B8 */
+    /*
+    case MESA_FORMAT_YCBCR:
+        t->texture_state.SQ_TEX_RESOURCE1.bitfields.DATA_FORMAT = ;
+        break;
+    */
+    /* VUY422 TODO conversion */  /* X, Y, Z, ONE, G8R8_G8B8 */
+    /*
+    case MESA_FORMAT_YCBCR_REV: 
+        t->texture_state.SQ_TEX_RESOURCE1.bitfields.DATA_FORMAT = ;
+        break;
+    */
+    case MESA_FORMAT_RGB_DXT1: /* not supported yet */
+        
+        break;
+    case MESA_FORMAT_RGBA_DXT1: /* not supported yet */
+        
+        break;
+    case MESA_FORMAT_RGBA_DXT3: /* not supported yet */
+        
+        break;
+    case MESA_FORMAT_RGBA_DXT5: /* not supported yet */
+        
+        break;
+    case MESA_FORMAT_RGBA_FLOAT32: 
+        SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_32_32_32_32_FLOAT,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);   
+        
+        t->texture_state.SQ_TEX_RESOURCE4.u32All |=
+                   (SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
+                  |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
+                  |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+        break;
+    case MESA_FORMAT_RGBA_FLOAT16: 
+        SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_16_16_16_16_FLOAT,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);  
+        
+        t->texture_state.SQ_TEX_RESOURCE4.u32All |=
+                   (SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
+                  |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
+                  |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+        break;
+    case MESA_FORMAT_RGB_FLOAT32: /* X, Y, Z, ONE */
+        SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_32_32_32_FLOAT,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);   
+        
+        t->texture_state.SQ_TEX_RESOURCE4.u32All |=
+                   (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
+                  |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
+                  |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+        break;
+    case MESA_FORMAT_RGB_FLOAT16: 
+        SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_16_16_16_FLOAT,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);   
+        
+        t->texture_state.SQ_TEX_RESOURCE4.u32All |=
+                   (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
+                  |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
+                  |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+        break;
+    case MESA_FORMAT_ALPHA_FLOAT32: /* ZERO, ZERO, ZERO, X */
+        SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_32_FLOAT,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);  
+        
+        t->texture_state.SQ_TEX_RESOURCE4.u32All |=
+                   (SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
+                  |(SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
+                  |(SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+        break;
+    case MESA_FORMAT_ALPHA_FLOAT16: /* ZERO, ZERO, ZERO, X */        
+        SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_16_FLOAT,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+        t->texture_state.SQ_TEX_RESOURCE4.u32All |=
+                   (SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
+                  |(SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
+                  |(SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+        break;
+    case MESA_FORMAT_LUMINANCE_FLOAT32: /* X, X, X, ONE */        
+        SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_32_FLOAT,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+        t->texture_state.SQ_TEX_RESOURCE4.u32All |=
+                   (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
+                  |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+        break;
+    case MESA_FORMAT_LUMINANCE_FLOAT16: /* X, X, X, ONE */
+        SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_16_FLOAT,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);  
+        
+        t->texture_state.SQ_TEX_RESOURCE4.u32All |=
+                   (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
+                  |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+        break;
+    case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32: 
+        SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_32_32_FLOAT,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);  
+        
+        t->texture_state.SQ_TEX_RESOURCE4.u32All |=
+                   (SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
+                  |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
+                  |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+        break;
+    case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16: 
+        SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_16_16_FLOAT,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);    
+        
+        t->texture_state.SQ_TEX_RESOURCE4.u32All |=
+                   (SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
+                  |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
+                  |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+        break;
+    case MESA_FORMAT_INTENSITY_FLOAT32: /* X, X, X, X */
+        SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_32_FLOAT,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);     
+        
+        t->texture_state.SQ_TEX_RESOURCE4.u32All |=
+                   (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+        break;
+    case MESA_FORMAT_INTENSITY_FLOAT16: /* X, X, X, X */
+        SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_16_FLOAT,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);    
+        
+        t->texture_state.SQ_TEX_RESOURCE4.u32All |=
+                   (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+        break;
+    case MESA_FORMAT_Z16: 
+    case MESA_FORMAT_Z24_S8:
+    case MESA_FORMAT_Z32:
+        switch (mesa_format)
+        {
+            case MESA_FORMAT_Z16:
+                SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_16,
+                         SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);                
+                break;
+            case MESA_FORMAT_Z24_S8:
+                SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_24_8,
+                         SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);                
+                break;
+            case MESA_FORMAT_Z32: 
+                SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_32,
+                         SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);                
+        };
+        switch (tObj->DepthMode) 
+        {
+        case GL_LUMINANCE:  /* X, X, X, ONE */          
+
+            t->texture_state.SQ_TEX_RESOURCE4.u32All |=
+                   (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
+                  |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+            break;
+        case GL_INTENSITY:  /* X, X, X, X */
+
+            t->texture_state.SQ_TEX_RESOURCE4.u32All |=
+                   (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+            break;
+        case GL_ALPHA:     /* ZERO, ZERO, ZERO, X */
+            t->texture_state.SQ_TEX_RESOURCE4.u32All |=
+                   (SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift)
+                  |(SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift)
+                  |(SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift)
+                  |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift);
+            break;
+        default:
+            return GL_FALSE;
+        } 
+        break;
+    default:
+        /* Not supported format */
+        return GL_FALSE;        
+    };
+    return GL_TRUE;
+}
+
+static void compute_tex_image_offset(
+       struct gl_texture_object *tObj,
+       GLuint face,
+       GLint level,
+       GLint* curOffset)
+{
+    r700TexObjPtr t = (r700TexObjPtr) tObj->DriverData;
+    const struct gl_texture_image* texImage;
+    GLuint blitWidth = R700_BLIT_WIDTH_BYTES;
+    GLuint texelBytes;
+    GLuint size;
+    GLuint pitch;
+
+    texImage = tObj->Image[0][level + t->base.firstLevel];
+    if (!texImage)
+    {
+           return;
+    }
+
+    texelBytes = texImage->TexFormat->TexelBytes;
+
+    pitch = (texImage->Width + R700_TEXEL_PITCH_ALIGNMENT_MASK) & ~R700_TEXEL_PITCH_ALIGNMENT_MASK;
+
+    /* find image size in bytes */
+    if (texImage->IsCompressed) 
+    {
+        /* not supported yet */
+    } 
+    else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) 
+    {
+        if( (ARRAY_LINEAR_ALIGNED << SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift)
+            == (t->texture_state.SQ_TEX_RESOURCE0.u32All & SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask) )
+        {
+            pitch = (texImage->Width * texelBytes + 255) & ~255;
+        }
+        else
+        {
+            if(0 == level)
+            {
+                pitch = (pitch * texelBytes + 63) & ~63;
+            }
+            else
+            {
+                pitch = texImage->Width * texelBytes;
+            }
+        }
+        size  =  pitch * texImage->Height;
+        blitWidth = 64 / texelBytes;
+        pitch /= texelBytes;
+    } 
+    else 
+    {
+        if( (ARRAY_LINEAR_ALIGNED << SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift)
+            == (t->texture_state.SQ_TEX_RESOURCE0.u32All & SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask) )
+        {
+            pitch = (texImage->Width * texelBytes + 255) & ~255;            
+        }
+        else
+        {
+            if(0 == level)
+            {
+                pitch = (pitch * texelBytes + 31) & ~31;
+            }
+            else
+            {
+                pitch = texImage->Width * texelBytes;
+            }
+        }
+        size  =  pitch * texImage->Height * texImage->Depth;
+        blitWidth = MAX2(texImage->Width, 64 / texelBytes);
+        pitch /= texelBytes;
+    }
+    assert(size > 0);
+
+    if( (0 == level) || (1 == level) ) /* 0 for BASE_ADDRESS, 1 for MIP_ADDRESS */
+    {
+        *curOffset = (*curOffset + R700_TEXTURE_ALIGNMENT_MASK) & ~R700_TEXTURE_ALIGNMENT_MASK;
+    }
+
+    if (texelBytes) 
+    {
+        /* fix x and y coords up later together with offset */
+        t->texel_pitch[face][level]        = pitch;
+        t->level_offset[face][level]       = *curOffset;   
+        t->byte_per_texel                  = texelBytes;
+        t->src_width_in_pexel[face][level] = texImage->Width;
+        t->src_hight_in_pexel[face][level] = texImage->Height;
+    } 
+    else 
+    {
+        /* Do it like one byte texel. */
+        pitch = (size + R700_TEXEL_PITCH_ALIGNMENT_MASK) & ~R700_TEXEL_PITCH_ALIGNMENT_MASK;
+        t->texel_pitch[face][level]        = pitch;
+        t->level_offset[face][level]       = *curOffset; 
+        t->byte_per_texel                  = 1;
+        t->src_width_in_pexel[face][level] = size;
+        t->src_hight_in_pexel[face][level] = 1;
+    }
+
+    *curOffset += size;
+}
+#endif /* to be enabled */
+void r700DestroyTexObj(context_t context, r700TexObjPtr t) 
+{
+    /* TODO : nuke r700 chip texture and sampler pointer. */
+    //int i;
+
+    //for (i = 0; i < rmesa->ctx->Const.MaxTextureUnits; i++) 
+    //{
+        //if (rmesa->state.texture.unit[i].texobj == t) {
+        //          rmesa->state.texture.unit[i].texobj = NULL;
+        //}
+    //}
+}
+#if 0 /* to be enabled */
+static void r700SetTexImages(context_t *context, struct gl_texture_object *tObj)
+{
+    r700TexObjPtr t = (r700TexObjPtr) tObj->DriverData;
+    const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
+    GLint curOffset;
+    GLint i, texelBytes;
+    GLint numLevels;
+    GLint log2Width, log2Height, log2Depth;
+    GLuint uTexelPitch;
+    if (!t->image_override) 
+    {
+        if(GL_FALSE == r700GetTexFormat(tObj, baseImage->TexFormat->MesaFormat) )
+        {
+            _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__);
+            return;
+        }        
+    } 
+    texelBytes = baseImage->TexFormat->TexelBytes;
+
+    switch (tObj->Target)
+    {
+        case GL_TEXTURE_1D: 
+            SETfield(t->texture_state.SQ_TEX_RESOURCE0.u32All, SQ_TEX_DIM_1D, DIM_shift, DIM_mask);            
+            SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, 0, TEX_DEPTH_shift, TEX_DEPTH_mask);            
+            break;
+        case GL_TEXTURE_2D: 
+        case GL_TEXTURE_RECTANGLE_NV:
+            SETfield(t->texture_state.SQ_TEX_RESOURCE0.u32All, SQ_TEX_DIM_2D, DIM_shift, DIM_mask);          
+            SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, 0, TEX_DEPTH_shift, TEX_DEPTH_mask);
+            break;
+        case GL_TEXTURE_3D:
+            SETfield(t->texture_state.SQ_TEX_RESOURCE0.u32All, SQ_TEX_DIM_3D, DIM_shift, DIM_mask); 
+            SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, tObj->Image[0][t->base.firstLevel]->Depth - 1, 
+                     TEX_DEPTH_shift, TEX_DEPTH_mask);            
+            break;
+        case GL_TEXTURE_CUBE_MAP:  
+            SETfield(t->texture_state.SQ_TEX_RESOURCE0.u32All, SQ_TEX_DIM_CUBEMAP, DIM_shift, DIM_mask);                       
+            SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, 0, TEX_DEPTH_shift, TEX_DEPTH_mask);
+            break;
+        default:
+            _mesa_problem(NULL, "unexpected texture target type in %s", __FUNCTION__);
+            return;
+    }
+    /* Compute which mipmap levels we really want to send to the hardware.
+     */
+    driCalculateTextureFirstLastLevel((driTextureObject *) t);
+    log2Width = tObj->Image[0][t->base.firstLevel]->WidthLog2;
+    log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2;
+    log2Depth = tObj->Image[0][t->base.firstLevel]->DepthLog2;
+    numLevels = t->base.lastLevel - t->base.firstLevel + 1;
+    assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
+    /* Calculate mipmap offsets and dimensions for blitting (uploading)
+     * The idea is that we lay out the mipmap levels within a block of
+     * memory organized as a rectangle of width BLIT_WIDTH_BYTES.
+     */
+    t->tile_bits = 0;
+    curOffset = 0;
+    if (tObj->Target == GL_TEXTURE_CUBE_MAP) 
+    {
+        ASSERT(log2Width == log2Height);
+        
+        for(i = 0; i < numLevels; i++) 
+        {
+            /* i is hw level */
+            GLuint face;
+            for(face = 0; face < 6; face++)
+            {
+                compute_tex_image_offset(tObj, face, i, &curOffset);
+            }
+        }
+    } 
+    else 
+    {
+        for (i = 0; i < numLevels; i++)
+        {
+            /* i is hw level */
+            compute_tex_image_offset(tObj, 0, i, &curOffset);
+        }
+    }
+    /* Align the total size of texture memory block.
+     */
+    t->base.totalSize = (curOffset + R700_TEXTURE_ALIGNMENT_MASK) & ~R700_TEXTURE_ALIGNMENT_MASK;
+    t->pitch = 0;
+    /* TODO : baseImage->IsCompressed, tObj->Target == GL_TEXTURE_RECTANGLE_NV */
+    
+    uTexelPitch = (tObj->Image[0][t->base.firstLevel]->Width + R700_TEXEL_PITCH_ALIGNMENT_MASK)
+                 & ~R700_TEXEL_PITCH_ALIGNMENT_MASK;
+
+    SETfield(t->texture_state.SQ_TEX_RESOURCE0.u32All, (uTexelPitch/8)-1, PITCH_shift, PITCH_mask); 
+    SETfield(t->texture_state.SQ_TEX_RESOURCE0.u32All, tObj->Image[0][t->base.firstLevel]->Width  - 1,
+             TEX_WIDTH_shift, TEX_WIDTH_mask);
+    SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, tObj->Image[0][t->base.firstLevel]->Height - 1,
+             TEX_HEIGHT_shift, TEX_HEIGHT_mask);
+}
+
+static void r700UploadSubImage(context_t    *context, 
+                               r700TexObjPtr t,
+                                          GLint         hwlevel, /* relative level to first real level. */
+                                          GLint         x, 
+                               GLint         y,                                
+                                          GLuint        face)
+{
+    struct gl_texture_image *texImage = NULL;
+    GLuint offset;
+    GLint imageWidth, imageHeight;
+    GLint ret;
+    const int level = hwlevel + t->base.firstLevel;
+
+    unsigned char *pSrc;
+
+    ASSERT(face < 6);
+
+    /* Ensure we have a valid texture to upload */
+    if ((hwlevel < 0) || (hwlevel >= RADEON_MAX_TEXTURE_LEVELS)) 
+    {
+        _mesa_problem(NULL, "bad texture level in %s", __FUNCTION__);
+        return;
+    }
+
+    texImage = t->base.tObj->Image[face][level];
+
+    if (!texImage) 
+    {          
+           return;
+    }
+    if (!texImage->Data) 
+    {
+        return;
+    }
+
+    if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) 
+    {
+        /* TODO :
+        assert(level == 0);
+        assert(hwlevel == 0);
+
+        r300UploadRectSubImage(rmesa, t, texImage, x, y, width, height);
+        */
+        return;
+    } 
+    else if (texImage->IsClientData) 
+    {
+        /* TODO :
+        r300UploadGARTClientSubImage(rmesa, t, texImage, hwlevel, x, y,
+                                    width, height);
+        */
+        return;
+    } 
+
+    imageWidth = texImage->Width;
+    imageHeight = texImage->Height;
+
+    /* use hwlevel for hwsurf. */
+    offset = t->bufAddr + t->level_offset[face][hwlevel]; 
+
+    pSrc = (unsigned char*)(texImage->Data);
+
+    (context->chipobj.LoadMemSurf)(context,
+                         offset, /* gpu addr */
+                         t->texel_pitch[face][hwlevel], /* dst_pitch_in_pixel */
+                         t->src_width_in_pexel[face][hwlevel], /*src_width_in_pixel */
+                         t->src_hight_in_pexel[face][hwlevel], /* height */
+                         t->byte_per_texel, /* byte_per_pixel */
+                         pSrc);  /* source data */
+}
+
+int r700UploadTexImages(GLcontext * ctx, struct gl_texture_object *tObj, GLuint face)
+{
+    context_t *context = R700_CONTEXT(ctx);
+    r700TexObjPtr t    = (r700TexObjPtr) tObj->DriverData; 
+
+    int       heap;
+    const int numLevels = t->base.lastLevel - t->base.firstLevel + 1;
+
+    if (t->image_override)
+    {
+        return 0;
+    }
+
+    if (t->base.totalSize == 0)
+    {
+        return 0;
+    }
+    /* TODO */
+    /*LOCK_HARDWARE(&rmesa->radeon);*/
+
+    if (t->base.memBlock == NULL) 
+    {
+        heap = RADEON_LOCAL_TEX_HEAP;
+        if( GL_FALSE == (context->chipobj.AllocMemSurf)(context,
+                                         &(t->base.memBlock),
+                                         &(t->base.heap),
+                                         &heap, /* prefered_heap, also return the actual heap used. */
+                                         t->base.totalSize) )
+        {
+            /* TODO */
+            /* UNLOCK_HARDWARE(&rmesa->radeon); */
+            return -1;
+        }
+
+        /* Set the base offset of the texture image */
+        t->bufAddr = context->screen->texOffset[heap] + t->base.memBlock->ofs;
+        t->offset  = t->bufAddr;
+
+        /*
+        if (!(t->base.tObj->Image[0][0]->IsClientData)) 
+        {            
+            t->offset |= t->tile_bits;
+        }
+        */
+    }
+
+    /* Let the world know we've used this memory recently.
+     */
+    driUpdateTextureLRU((driTextureObject *) t);
+
+    /* TODO */
+    /* UNLOCK_HARDWARE(&rmesa->radeon); */
+
+    /* Upload any images that are new */
+    if (t->my_dirty_images[face]) 
+    {
+        int i;
+        for(i = 0; i < numLevels; i++) 
+        {
+            if( (t->my_dirty_images[face] & (1 << (i + t->base.firstLevel))) !=0) 
+            {
+                r700UploadSubImage(context, 
+                                   t, 
+                                   i, /* i is hw level */
+                                   0, 
+                                   0,                                       
+                                   face);
+            }
+        }
+        t->base.dirty_images[face] = 0;
+        t->my_dirty_images[face] = 0;
+    }
+
+    /* TODO : 3D, CUBE */
+    t->texture_state.SQ_TEX_RESOURCE2.u32All = t->bufAddr / 256;
+    if( (t->base.lastLevel - t->base.firstLevel) > 0 )
+    {
+        t->texture_state.SQ_TEX_RESOURCE3.u32All  = (t->bufAddr + t->level_offset[0][1]) / 256; /* MIP_ADDRESS */
+
+        SETfield(t->texture_state.SQ_TEX_RESOURCE4.u32All, t->base.firstLevel, BASE_LEVEL_shift, BASE_LEVEL_mask);
+        SETfield(t->texture_state.SQ_TEX_RESOURCE5.u32All, t->base.lastLevel, LAST_LEVEL_shift, LAST_LEVEL_mask);        
+    }
+
+       return 0;
+}
+static GLboolean r700EnableTexture2D(GLcontext * ctx, int unit)
+{
+    context_t *context = R700_CONTEXT(ctx);
+    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+    struct gl_texture_object *tObj = texUnit->_Current;
+    r700TexObjPtr t = (r700TexObjPtr) tObj->DriverData;
+    ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D);
+    if (t->base.dirty_images[0]) 
+    {        
+        r700SetTexImages(context, tObj);
+        r700UploadTexImages(ctx, tObj, 0);
+        if (!t->base.memBlock && !t->image_override)
+        {
+            return GL_FALSE;
+        }
+    } 
+    return GL_TRUE;
+}
+
+/* try to find a format which will only need a memcopy */
+static const struct gl_texture_format *r700Choose8888TexFormat(GLenum srcFormat,
+                                                              GLenum srcType)
+{
+    struct gl_texture_format * gtfRet;
+
+    const GLuint ui = 1;
+    const GLubyte littleEndian = *((const GLubyte *)&ui);
+
+    if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
+        (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
+        (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
+        (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && littleEndian)) 
+    {
+        gtfRet = &_mesa_texformat_rgba8888;
+    } 
+    else if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
+              (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && littleEndian) ||
+              (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
+              (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && !littleEndian)) 
+    {
+        gtfRet =  &_mesa_texformat_rgba8888_rev;
+    } 
+    else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
+                                       srcType == GL_UNSIGNED_INT_8_8_8_8)) 
+    {
+        gtfRet = &_mesa_texformat_argb8888_rev;
+    } 
+    else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && littleEndian) ||
+                                       srcType == GL_UNSIGNED_INT_8_8_8_8_REV)) 
+    {
+        gtfRet = &_mesa_texformat_argb8888;
+    } 
+    else
+    {
+        gtfRet = _dri_texformat_argb8888;
+    }
+
+    return gtfRet;
+}
+
+#endif /* to be enabled */
+
+static r700TexObjPtr r700AllocTexObj(struct gl_texture_object *texObj)
+{
+       r700TexObjPtr t;
+
+       t = CALLOC_STRUCT(r700_tex_obj);
+       texObj->DriverData = t;
+       if (t != NULL) 
+    {
+#if 0 /* to be enabled */
+               /* Initialize non-image-dependent parts of the state:
+                */
+               t->base.tObj = texObj;
+               t->border_fallback = GL_FALSE;
+
+               make_empty_list(&t->base);
+
+        /* Init text object to default states. */
+        t->texture_state.SQ_TEX_RESOURCE0.u32All              = 0;
+        SETfield(t->texture_state.SQ_TEX_RESOURCE0.u32All, SQ_TEX_DIM_2D, DIM_shift, DIM_mask); 
+        SETfield(t->texture_state.SQ_TEX_RESOURCE0.u32All, ARRAY_LINEAR_GENERAL,
+                 SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift, SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask);
+        CLEARbit(t->texture_state.SQ_TEX_RESOURCE0.u32All, TILE_TYPE_bit);
+        
+        t->texture_state.SQ_TEX_RESOURCE1.u32All                = 0;
+        SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, FMT_8_8_8_8,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+        t->texture_state.SQ_TEX_RESOURCE2.u32All                = 0;
+        t->texture_state.SQ_TEX_RESOURCE3.u32All                = 0;
+        
+        t->texture_state.SQ_TEX_RESOURCE4.u32All                   = 0;
+        SETfield(t->texture_state.SQ_TEX_RESOURCE4.u32All, SQ_FORMAT_COMP_UNSIGNED, 
+                 FORMAT_COMP_X_shift, FORMAT_COMP_X_mask);
+        SETfield(t->texture_state.SQ_TEX_RESOURCE4.u32All, SQ_FORMAT_COMP_UNSIGNED, 
+                 FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask);
+        SETfield(t->texture_state.SQ_TEX_RESOURCE4.u32All, SQ_FORMAT_COMP_UNSIGNED, 
+                 FORMAT_COMP_Z_shift, FORMAT_COMP_Z_mask);
+        SETfield(t->texture_state.SQ_TEX_RESOURCE4.u32All, SQ_FORMAT_COMP_UNSIGNED, 
+                 FORMAT_COMP_W_shift, FORMAT_COMP_W_mask);
+        SETfield(t->texture_state.SQ_TEX_RESOURCE4.u32All, SQ_NUM_FORMAT_NORM,
+                 SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift, SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_mask);
+        CLEARbit(t->texture_state.SQ_TEX_RESOURCE4.u32All, SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit);
+        CLEARbit(t->texture_state.SQ_TEX_RESOURCE4.u32All, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit);
+        SETfield(t->texture_state.SQ_TEX_RESOURCE4.u32All, SQ_ENDIAN_NONE,
+                 SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift, SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_mask);
+        SETfield(t->texture_state.SQ_TEX_RESOURCE4.u32All, 1, REQUEST_SIZE_shift, REQUEST_SIZE_mask);
+        t->texture_state.SQ_TEX_RESOURCE4.u32All |= SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift
+                                                   |SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift
+                                                   |SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift
+                                                   |SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift;
+        SETfield(t->texture_state.SQ_TEX_RESOURCE4.u32All, 0, BASE_LEVEL_shift, BASE_LEVEL_mask); /* mip-maps */
+                  
+        t->texture_state.SQ_TEX_RESOURCE5.u32All = 0;
+              
+        t->texture_state.SQ_TEX_RESOURCE6.u32All = 0;
+     
+        SETfield(t->texture_state.SQ_TEX_RESOURCE6.u32All, SQ_TEX_VTX_VALID_TEXTURE,
+                 SQ_TEX_RESOURCE_WORD6_0__TYPE_shift, SQ_TEX_RESOURCE_WORD6_0__TYPE_mask);
+    
+        /* Initialize sampler registers */
+        t->sampler_state.SQ_TEX_SAMPLER0.u32All                           = 0;
+        t->sampler_state.SQ_TEX_SAMPLER0.u32All |=
+                         SQ_TEX_WRAP << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift
+                        |SQ_TEX_WRAP << CLAMP_Y_shift
+                        |SQ_TEX_WRAP << CLAMP_Z_shift
+                        |SQ_TEX_XY_FILTER_POINT << XY_MAG_FILTER_shift
+                        |SQ_TEX_XY_FILTER_POINT << XY_MIN_FILTER_shift
+                        |SQ_TEX_Z_FILTER_NONE << Z_FILTER_shift
+                        |SQ_TEX_Z_FILTER_NONE << MIP_FILTER_shift                        
+                        |SQ_TEX_BORDER_COLOR_TRANS_BLACK << BORDER_COLOR_TYPE_shift;
+                        
+        t->sampler_state.SQ_TEX_SAMPLER1.u32All = 0x7FF << MAX_LOD_shift;
+        
+        t->sampler_state.SQ_TEX_SAMPLER2.u32All                          = 0;        
+        SETbit(t->sampler_state.SQ_TEX_SAMPLER2.u32All, SQ_TEX_SAMPLER_WORD2_0__TYPE_bit);
+#endif /* to be enabled */
+       }
+
+       return t;
+}
+
+static GLboolean
+r700ValidateClientStorage(GLcontext * ctx, GLenum target,
+                         GLint internalFormat,
+                         GLint srcWidth, GLint srcHeight,
+                         GLenum format, GLenum type, const void *pixels,
+                         const struct gl_pixelstore_attrib *packing,
+                         struct gl_texture_object *texObj,
+                         struct gl_texture_image *texImage)
+{
+    if (!ctx->Unpack.ClientStorage)
+    {
+        return 0;
+    }
+
+    if (ctx->_ImageTransferState ||
+        texImage->IsCompressed || texObj->GenerateMipmap)
+    {
+        return 0;
+    }
+
+    /* This list is incomplete, may be different on ppc???
+     */
+    switch (internalFormat) 
+    {
+    case GL_RGBA:
+        if (format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8_REV) 
+        {
+            texImage->TexFormat = _dri_texformat_argb8888;
+        } 
+        else
+        {
+            return 0;
+        }
+        break;
+
+    case GL_RGB:
+        if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) 
+        {
+               texImage->TexFormat = _dri_texformat_rgb565;
+        } 
+        else
+        {
+            return 0;
+        }
+        break;
+
+    case GL_YCBCR_MESA:
+        if (format == GL_YCBCR_MESA &&
+            type == GL_UNSIGNED_SHORT_8_8_REV_APPLE) 
+        {
+            texImage->TexFormat = &_mesa_texformat_ycbcr_rev;
+        } 
+        else if( format == GL_YCBCR_MESA &&
+                    (type == GL_UNSIGNED_SHORT_8_8_APPLE || type == GL_UNSIGNED_BYTE)) 
+        {
+            texImage->TexFormat = &_mesa_texformat_ycbcr;
+        } 
+        else
+        {
+            return 0;
+        }
+        break;
+
+    default:
+        return 0;
+    }
+
+    /* Could deal with these packing issues, but currently don't:
+     */
+    if (packing->SkipPixels ||
+        packing->SkipRows || packing->SwapBytes || packing->LsbFirst) 
+    {
+           return 0;
+    }
+
+    GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth, format, type);
+
+    /* Have validated that _mesa_transfer_teximage would be a straight
+     * memcpy at this point.  NOTE: future calls to TexSubImage will
+     * overwrite the client data.  This is explicitly mentioned in the
+     * extension spec.
+     */
+    texImage->Data = (void *)pixels;
+    texImage->IsClientData = GL_TRUE;
+    texImage->RowStride = srcRowStride / texImage->TexFormat->TexelBytes;
+
+    return 1;
+}
+
+static void r700TexImage1D(GLcontext * ctx, GLenum target, GLint level,
+                          GLint internalFormat,
+                          GLint width, GLint border,
+                          GLenum format, GLenum type, const GLvoid * pixels,
+                          const struct gl_pixelstore_attrib *packing,
+                          struct gl_texture_object *texObj,
+                          struct gl_texture_image *texImage) 
+{
+}
+
+static void r700TexImage2D(GLcontext * ctx, GLenum target, GLint level,
+                          GLint internalFormat,
+                          GLint width, GLint height, GLint border,
+                          GLenum format, GLenum type, const GLvoid * pixels,
+                          const struct gl_pixelstore_attrib *packing,
+                          struct gl_texture_object *texObj,
+                          struct gl_texture_image *texImage) 
+{
+#if 0 /* to be enabled */
+    r700TexObjPtr r700t = (r700TexObjPtr) texObj->DriverData;
+
+    driTextureObject *t = (driTextureObject *) texObj->DriverData;
+    GLuint face;
+
+    /* which cube face or ordinary 2D image */
+    switch (target) 
+    {
+    case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+    case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+    case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+    case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+    case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+    case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+        face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+        ASSERT(face < 6);
+        break;
+    default:
+        face = 0;
+    }
+
+    if (t != NULL) 
+    {
+           driSwapOutTextureObject(t);
+    } 
+    else 
+    {
+        t = (driTextureObject *) r700AllocTexObj(texObj);
+        if (!t) 
+        {
+            _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
+            return;
+        }
+    }
+
+    texImage->IsClientData = GL_FALSE;
+
+    if (r700ValidateClientStorage(ctx, target,
+                                 internalFormat,
+                                 width, height,
+                                 format, type, pixels,
+                                 packing, texObj, texImage)) 
+    {
+        /* client maintained surface */
+    } 
+    else 
+    {
+        /* Normal path: copy (to cached memory) and eventually upload
+         * via another copy to GART memory and then a blit...  Could
+         * eliminate one copy by going straight to (permanent) GART.
+         *
+         * Note, this will call r700ChooseTextureFormat.
+         */
+        _mesa_store_teximage2d(ctx, target, level, internalFormat,
+                              width, height, border, format, type,
+                              pixels, &ctx->Unpack, texObj, texImage);
+
+        t->dirty_images[face] |= (1 << level);
+
+        /* mesa dirty_images is not correct, so use own one for now, review it later. */
+        r700t->my_dirty_images[face] |= (1 << level);
+    }
+#endif /* to be enabled */
+}
+
+static void r700TexImage3D(GLcontext * ctx, GLenum target, GLint level,
+                          GLint internalFormat,
+                          GLint width, GLint height, GLint depth,
+                          GLint border,
+                          GLenum format, GLenum type, const GLvoid * pixels,
+                          const struct gl_pixelstore_attrib *packing,
+                          struct gl_texture_object *texObj,
+                          struct gl_texture_image *texImage) 
+{
+}
+
+static void r700TexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
+                             GLint xoffset,
+                             GLsizei width,
+                             GLenum format, GLenum type,
+                             const GLvoid * pixels,
+                             const struct gl_pixelstore_attrib *packing,
+                             struct gl_texture_object *texObj,
+                             struct gl_texture_image *texImage) 
+{
+
+}
+
+static void r700TexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
+                             GLint xoffset, GLint yoffset,
+                             GLsizei width, GLsizei height,
+                             GLenum format, GLenum type,
+                             const GLvoid * pixels,
+                             const struct gl_pixelstore_attrib *packing,
+                             struct gl_texture_object *texObj,
+                             struct gl_texture_image *texImage) 
+{
+}
+
+static void r700TexSubImage3D(GLcontext * ctx, GLenum target, GLint level,
+                 GLint xoffset, GLint yoffset, GLint zoffset,
+                 GLsizei width, GLsizei height, GLsizei depth,
+                 GLenum format, GLenum type,
+                 const GLvoid * pixels,
+                 const struct gl_pixelstore_attrib *packing,
+                 struct gl_texture_object *texObj,
+                 struct gl_texture_image *texImage) 
+{
+}
+
+/**
+ * Allocate a new texture object.
+ * Called via ctx->Driver.NewTextureObject.
+ * Note: this function will be called during context creation to
+ * allocate the default texture objects.
+ * Note: we could use containment here to 'derive' the driver-specific
+ * texture object from the core mesa gl_texture_object.  Not done at this time.
+ */
+static struct gl_texture_object *r700NewTextureObject(GLcontext * ctx,
+                                                     GLuint name,
+                                                     GLenum target) 
+{
+    context_t *context = R700_CONTEXT(ctx);
+
+       struct gl_texture_object *obj;
+
+       obj = _mesa_new_texture_object(ctx, name, target);
+       if (!obj)
+       {
+               return NULL;
+       }
+
+    //obj->MaxAnisotropy = context->initialMaxAnisotropy;
+
+       r700AllocTexObj(obj);
+
+       return obj;
+}
+
+static void r700BindTexture(GLcontext * ctx, GLenum target,
+                           struct gl_texture_object *texObj)  
+{
+    if ((target == GL_TEXTURE_1D)
+        || (target == GL_TEXTURE_2D)
+        || (target == GL_TEXTURE_3D)
+        || (target == GL_TEXTURE_CUBE_MAP)
+        || (target == GL_TEXTURE_RECTANGLE_NV)) 
+    {
+           assert(texObj->DriverData != NULL);
+    }
+}
+
+static void r700DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
+{
+} 
+
+#if 0 /* to be enabled */
+static void r700SetTexMinFilter(r700TexObjPtr t, GLenum minf)
+{
+    switch (minf)
+    {
+    case GL_NEAREST:
+        
+        SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, TEX_XYFilter_Point,
+                 XY_MIN_FILTER_shift, XY_MIN_FILTER_mask);                        
+               
+        SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, TEX_MipFilter_None,
+                 MIP_FILTER_shift, MIP_FILTER_mask);        
+        break;
+    case GL_LINEAR:
+        
+        SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, TEX_XYFilter_Linear,
+                 XY_MIN_FILTER_shift, XY_MIN_FILTER_mask);                         
+        
+        SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, TEX_MipFilter_None,
+                 MIP_FILTER_shift, MIP_FILTER_mask);                
+        break;
+    case GL_NEAREST_MIPMAP_NEAREST:
+        
+        SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, TEX_XYFilter_Point,
+                 XY_MIN_FILTER_shift, XY_MIN_FILTER_mask);                        
+        
+        SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, TEX_MipFilter_Point,
+                 MIP_FILTER_shift, MIP_FILTER_mask);               
+        break;
+    case GL_LINEAR_MIPMAP_NEAREST:
+        
+        SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, TEX_XYFilter_Linear,
+                 XY_MIN_FILTER_shift, XY_MIN_FILTER_mask);                        
+        
+        SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, TEX_MipFilter_Point,
+                 MIP_FILTER_shift, MIP_FILTER_mask);               
+        break;
+    case GL_NEAREST_MIPMAP_LINEAR:
+        
+        SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, TEX_XYFilter_Point,
+                 XY_MIN_FILTER_shift, XY_MIN_FILTER_mask);                         
+        
+        SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, TEX_MipFilter_Linear,
+                 MIP_FILTER_shift, MIP_FILTER_mask);               
+        break;
+    case GL_LINEAR_MIPMAP_LINEAR:
+        
+        SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, TEX_XYFilter_Linear,
+                 XY_MIN_FILTER_shift, XY_MIN_FILTER_mask);                       
+        
+        SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, TEX_MipFilter_Linear,
+                 MIP_FILTER_shift, MIP_FILTER_mask);                
+        break;
+    default:
+        /* no case */
+        break;
+    }
+}
+
+static void r700SetTexMagFilter(r700TexObjPtr t, GLenum magf)
+{
+    switch(magf)
+    {
+    case GL_NEAREST:
+        
+        SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, TEX_XYFilter_Point,
+                 XY_MAG_FILTER_shift, XY_MAG_FILTER_mask);                        
+                    
+        break;
+    case GL_LINEAR:
+        
+        SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, TEX_XYFilter_Linear,
+                 XY_MAG_FILTER_shift, XY_MAG_FILTER_mask);                         
+                   
+        break;
+    default:
+        break;
+    }
+}
+
+static unsigned int r700GetWrapMode(GLenum wrapmode)
+{
+    switch(wrapmode) 
+    {
+    case GL_REPEAT: 
+        return SQ_TEX_WRAP;
+    case GL_CLAMP: 
+        return SQ_TEX_CLAMP_HALF_BORDER;
+    case GL_CLAMP_TO_EDGE: 
+        return SQ_TEX_CLAMP_LAST_TEXEL;
+    case GL_CLAMP_TO_BORDER: 
+        return SQ_TEX_CLAMP_BORDER;
+    case GL_MIRRORED_REPEAT: 
+        return SQ_TEX_MIRROR_ONCE_HALF_BORDER;
+    case GL_MIRROR_CLAMP_EXT: 
+        return SQ_TEX_MIRROR;
+    case GL_MIRROR_CLAMP_TO_EDGE_EXT: 
+        return SQ_TEX_MIRROR_ONCE_BORDER;
+    case GL_MIRROR_CLAMP_TO_BORDER_EXT: 
+        return SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
+    default:
+           _mesa_problem(NULL, "bad wrap mode in %s", __FUNCTION__);
+           return 0;
+    }
+}
+#endif /* to be enabled */
+
+static void r700TexParameter(GLcontext * ctx, GLenum target,
+                            struct gl_texture_object *texObj,
+                            GLenum pname, const GLfloat * params)  
+{
+    r700TexObjPtr t = (r700TexObjPtr) texObj->DriverData;
+#if 0 /* to be enabled */
+    switch (pname) 
+    {
+    case GL_TEXTURE_MIN_FILTER:
+        r700SetTexMinFilter(t, texObj->MinFilter);
+        break;
+    case GL_TEXTURE_MAG_FILTER:
+        r700SetTexMagFilter(t, texObj->MagFilter);
+        break;
+    case GL_TEXTURE_MAX_ANISOTROPY_EXT:            
+        
+        r700SetTexMinFilter(t, texObj->MinFilter);
+        r700SetTexMagFilter(t, texObj->MagFilter);
+
+           break;
+
+    case GL_TEXTURE_WRAP_S:        
+        SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, r700GetWrapMode(texObj->WrapS),
+                 SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift, SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask);
+        break;
+    case GL_TEXTURE_WRAP_T:
+        SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, r700GetWrapMode(texObj->WrapT), 
+                 CLAMP_Y_shift, CLAMP_Y_mask);
+        break;
+    case GL_TEXTURE_WRAP_R:
+        SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, r700GetWrapMode(texObj->WrapR),
+                 CLAMP_Z_shift, CLAMP_Z_mask);
+           break;
+
+    case GL_TEXTURE_BORDER_COLOR:
+        /* TODO : set border color regs before rendering. */
+        SETfield(t->sampler_state.SQ_TEX_SAMPLER0.u32All, SQ_TEX_BORDER_COLOR_REGISTER,
+                 BORDER_COLOR_TYPE_shift, BORDER_COLOR_TYPE_mask); 
+           break;
+
+    case GL_TEXTURE_BASE_LEVEL:
+    case GL_TEXTURE_MAX_LEVEL:
+    case GL_TEXTURE_MIN_LOD:
+    case GL_TEXTURE_MAX_LOD:
+           /* TODO : we do support this, add it later. */
+           driSwapOutTextureObject((driTextureObject *) t);
+           break;
+
+    case GL_DEPTH_TEXTURE_MODE:
+           if (!texObj->Image[0][texObj->BaseLevel])
+        {
+                   return;
+        }
+           if (texObj->Image[0][texObj->BaseLevel]->TexFormat->BaseFormat
+               == GL_DEPTH_COMPONENT) 
+        {
+                   /* TODO : r700SetDepthTexMode(texObj); */
+                   break;
+           } 
+        else 
+        {
+            /* If not depth texture, just return. */
+                   return;
+           }
+
+    default:        
+           return;
+    }
+#endif /* to be enabled */
+}
+
+static void r700CompressedTexImage2D(GLcontext * ctx, GLenum target,
+                                    GLint level, GLint internalFormat,
+                                    GLint width, GLint height, GLint border,
+                                    GLsizei imageSize, const GLvoid * data,
+                                    struct gl_texture_object *texObj,
+                                    struct gl_texture_image *texImage)  
+{
+}
+
+static void r700CompressedTexSubImage2D(GLcontext * ctx, GLenum target,
+                                       GLint level, GLint xoffset,
+                                       GLint yoffset, GLsizei width,
+                                       GLsizei height, GLenum format,
+                                       GLsizei imageSize, const GLvoid * data,
+                                       struct gl_texture_object *texObj,
+                                       struct gl_texture_image *texImage)  
+{
+}
+
+static GLboolean r700UpdateTextureUnit(GLcontext * ctx, int unit)
+{
+    return GL_TRUE;
+}
+
+static GLboolean r700EnableTextureRect(GLcontext * ctx, int unit)
+{
+    return GL_TRUE;
+}
+
+static GLboolean r700EnableTexture3D(GLcontext * ctx, int unit)
+{
+    return GL_TRUE;
+}
+
+static GLboolean r700EnableTextureCube(GLcontext * ctx, int unit)
+{
+    return GL_TRUE;
+}
+
+static GLboolean r700UpdateTexture(GLcontext * ctx, int unit)
+{
+#if 0 /* to be enabled */
+    context_t         *context = R700_CONTEXT(ctx);
+    R700_CHIP_CONTEXT *r700    = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj);
+
+    struct gl_texture_unit   *texUnit = &ctx->Texture.Unit[unit];
+       struct gl_texture_object *tObj    = texUnit->_Current;
+       r700TexObjPtr             t       = (r700TexObjPtr) tObj->DriverData;
+
+    if( r700->texture_states.textures[unit] != &(t->texture_state) )
+    {
+        if(NULL != r700->texture_states.textures[unit])
+        {   /* there is an old one. */
+        }
+
+        r700->texture_states.textures[unit] = &(t->texture_state);
+        r700->texture_states.samplers[unit] = &(t->sampler_state);
+        driUpdateTextureLRU((driTextureObject *) t);   /* XXX: should be locked! */
+    }
+#endif /* to be enabled */
+
+    return GL_TRUE;
+}
+
+void r700UpdateTextureState(context_t * context)  
+{
+#if 0 /* to be enabled */
+    GLboolean bRet;
+    GLuint    unit;
+    GLcontext * ctx = context->ctx;
+    struct gl_texture_unit *texUnit;
+
+    for (unit = 0; unit < 8; unit++) 
+    {
+        texUnit = &ctx->Texture.Unit[unit];
+
+        if (texUnit->_ReallyEnabled & (TEXTURE_RECT_BIT)) 
+        {
+            bRet = (r700EnableTextureRect(ctx, unit) &&
+                    r700UpdateTexture(ctx, unit));
+        } 
+        else if (texUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT)) 
+        {
+            bRet = (r700EnableTexture2D(ctx, unit) &&
+                    r700UpdateTexture(ctx, unit));
+        } 
+        else if (texUnit->_ReallyEnabled & (TEXTURE_3D_BIT)) 
+        {
+            bRet = (r700EnableTexture3D(ctx, unit) &&
+                    r700UpdateTexture(ctx, unit));
+        } 
+        else if (texUnit->_ReallyEnabled & (TEXTURE_CUBE_BIT)) 
+        {
+            bRet = (r700EnableTextureCube(ctx, unit) &&
+                    r700UpdateTexture(ctx, unit));
+        } 
+        else if (texUnit->_ReallyEnabled) 
+        {
+            bRet = GL_FALSE;
+        } 
+        else 
+        {
+            bRet = GL_TRUE;
+        }
+
+        if (!bRet) 
+        {
+               _mesa_warning(ctx, "failed to update texture state for unit %d.\n", unit);
+        }
+    }
+#endif /* to be enabled */
+}
+
+void r700InitTextureFuncs(struct dd_function_table *functions) 
+{
+       /* Note: we only plug in the functions we implement in the driver
+        * since _mesa_init_driver_functions() was already called.
+        */
+       functions->ChooseTextureFormat = radeonChooseTextureFormat_mesa;
+       functions->TexImage1D = r700TexImage1D;
+       functions->TexImage2D = r700TexImage2D;
+       functions->TexImage3D = r700TexImage3D;
+       functions->TexSubImage1D = r700TexSubImage1D;
+       functions->TexSubImage2D = r700TexSubImage2D;
+       functions->TexSubImage3D = r700TexSubImage3D;
+       functions->NewTextureObject = r700NewTextureObject;
+       functions->BindTexture = r700BindTexture;
+       functions->DeleteTexture = r700DeleteTexture;
+       functions->IsTextureResident = driIsTextureResident;
+
+       functions->TexParameter = r700TexParameter;
+
+       functions->CompressedTexImage2D = r700CompressedTexImage2D;
+       functions->CompressedTexSubImage2D = r700CompressedTexSubImage2D;
+
+       driInitTextureFormats();
+}
+
+
diff --git a/src/mesa/drivers/dri/r600/r700_tex.h b/src/mesa/drivers/dri/r600/r700_tex.h
new file mode 100644 (file)
index 0000000..1329057
--- /dev/null
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#ifndef __r700_TEX_H__
+#define __r700_TEX_H__
+
+#include "texmem.h"
+
+#include "r700_chip.h"
+
+/* TODO : review this after texture load code. */
+#define R700_BLIT_WIDTH_BYTES 1024
+/* The BASE_ADDRESS and MIP_ADDRESS fields are 256-byte-aligned */
+#define R700_TEXTURE_ALIGNMENT_MASK     0x255
+/* Texel pitch is 8 alignment. */
+#define R700_TEXEL_PITCH_ALIGNMENT_MASK 0x7
+
+#define R700_MAX_TEXTURE_UNITS 8 /* TODO : should be 16, lets make it work, review later */
+
+typedef struct r700_tex_obj r700TexObj, *r700TexObjPtr;
+
+/* Texture object in locally shared texture space.
+ */
+struct r700_tex_obj 
+{
+       driTextureObject base;
+
+       /* r300 tex obj */
+       GLuint bufAddr; 
+       GLboolean image_override;
+       GLuint pitch;           
+       GLuint filter;
+       GLuint filter_1;
+       GLuint pitch_reg;
+       GLuint size;            
+       GLuint format;
+       GLuint offset;          
+       GLuint unknown4;
+       GLuint unknown5;
+       GLboolean border_fallback;
+       GLuint tile_bits;
+    
+    /* r700 texture states */
+    TEXTURE_STATE_STRUCT texture_state;
+    SAMPLER_STATE_STRUCT sampler_state;
+
+    GLuint texel_pitch[6][RADEON_MAX_TEXTURE_LEVELS];
+    GLuint level_offset[6][RADEON_MAX_TEXTURE_LEVELS];
+    GLuint byte_per_texel;    
+    GLuint src_width_in_pexel[6][RADEON_MAX_TEXTURE_LEVELS];
+    GLuint src_hight_in_pexel[6][RADEON_MAX_TEXTURE_LEVELS];
+
+    GLuint my_dirty_images[6]; /* TODO : review */
+};
+
+extern void r700SetTexBuffer(__DRIcontext *pDRICtx, GLint target,
+                            __DRIdrawable *dPriv);
+
+extern void r700SetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
+                             GLint format, __DRIdrawable *dPriv);
+
+extern void r700SetTexOffset(__DRIcontext *pDRICtx, GLint texname,
+                            unsigned long long offset, GLint depth,
+                            GLuint pitch);
+
+extern GLuint r700GetTexObjSize(void);
+extern void r700UpdateTextureState(context_t * context);
+
+extern void r700SetTexOffset(__DRIcontext *pDRICtx, 
+                             GLint texname,
+                                        unsigned long long offset, 
+                             GLint depth,
+                                        GLuint pitch);
+
+extern void r700DestroyTexObj(context_t rmesa, r700TexObjPtr t);
+
+extern void r700InitTextureFuncs(struct dd_function_table *functions);
+
+#endif /* __r700_TEX_H__ */
diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c
new file mode 100644 (file)
index 0000000..22a12a0
--- /dev/null
@@ -0,0 +1,466 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "main/imports.h"
+#include "main/mtypes.h"
+
+#include "tnl/t_context.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_statevars.h"
+
+#include "r600_context.h"
+
+#include "r700_chip.h"
+#include "r700_debug.h"
+#include "r700_vertprog.h"
+
+#if 0 /* to be enabled */
+#include "r700_emit.h"
+#endif
+
+unsigned int Map_Vertex_Output(r700_AssemblerBase       *pAsm, 
+                                                  struct gl_vertex_program *mesa_vp,
+                                                  unsigned int unStart)
+{
+    unsigned int i;
+       unsigned int unBit;
+       unsigned int unTotal = unStart;
+
+    //!!!!!!! THE ORDER MATCH FS INPUT 
+
+       unBit = 1 << VERT_RESULT_HPOS;
+       if(mesa_vp->Base.OutputsWritten & unBit)
+       {
+               pAsm->ucVP_OutputMap[VERT_RESULT_HPOS] = unTotal++;
+       }
+
+       unBit = 1 << VERT_RESULT_COL0;
+       if(mesa_vp->Base.OutputsWritten & unBit)
+       {
+               pAsm->ucVP_OutputMap[VERT_RESULT_COL0] = unTotal++;
+       }
+
+       unBit = 1 << VERT_RESULT_COL1;
+       if(mesa_vp->Base.OutputsWritten & unBit)
+       {
+               pAsm->ucVP_OutputMap[VERT_RESULT_COL1] = unTotal++;
+       }
+
+       //TODO : dealing back face.
+       //unBit = 1 << VERT_RESULT_BFC0;
+       //if(mesa_vp->Base.OutputsWritten & unBit)
+       //{
+       //      pAsm->ucVP_OutputMap[VERT_RESULT_COL0] = unTotal++;
+       //}
+
+       //unBit = 1 << VERT_RESULT_BFC1;
+       //if(mesa_vp->Base.OutputsWritten & unBit)
+       //{
+       //      pAsm->ucVP_OutputMap[VERT_RESULT_COL1] = unTotal++;
+       //}
+
+       //TODO : dealing fog.
+       //unBit = 1 << VERT_RESULT_FOGC;
+       //if(mesa_vp->Base.OutputsWritten & unBit)
+       //{
+       //      pAsm->ucVP_OutputMap[VERT_RESULT_FOGC] = unTotal++;
+       //}
+
+       //TODO : dealing point size.
+       //unBit = 1 << VERT_RESULT_PSIZ;
+       //if(mesa_vp->Base.OutputsWritten & unBit)
+       //{
+       //      pAsm->ucVP_OutputMap[VERT_RESULT_PSIZ] = unTotal++;
+       //}
+
+       for(i=0; i<8; i++)
+       {
+               unBit = 1 << (VERT_RESULT_TEX0 + i);
+               if(mesa_vp->Base.OutputsWritten & unBit)
+               {
+                       pAsm->ucVP_OutputMap[VERT_RESULT_TEX0 + i] = unTotal++;
+               }
+       }
+
+       return (unTotal - unStart);
+}
+
+unsigned int Map_Vertex_Input(r700_AssemblerBase       *pAsm, 
+                                         struct gl_vertex_program *mesa_vp,
+                                         unsigned int unStart)
+{
+    int i;
+       unsigned int unBit;
+       unsigned int unTotal = unStart;
+       for(i=0; i<VERT_ATTRIB_MAX; i++)
+       {
+               unBit = 1 << i;
+               if(mesa_vp->Base.InputsRead & unBit)
+               {
+                       pAsm->ucVP_AttributeMap[i] = unTotal++;
+               }
+       }
+       return (unTotal - unStart);
+}
+
+GLboolean Process_Vertex_Program_Vfetch_Instructions(
+                                               struct r700_vertex_program *vp,
+                                               struct gl_vertex_program   *mesa_vp)
+{
+    int i;
+    unsigned int unBit;
+       VTX_FETCH_METHOD vtxFetchMethod;
+       vtxFetchMethod.bEnableMini          = GL_FALSE;
+       vtxFetchMethod.mega_fetch_remainder = 0;
+
+       for(i=0; i<VERT_ATTRIB_MAX; i++)
+       {
+               unBit = 1 << i;
+               if(mesa_vp->Base.InputsRead & unBit)
+               {
+                       assemble_vfetch_instruction(&vp->r700AsmCode,
+                                                               i,
+                                vp->r700AsmCode.ucVP_AttributeMap[i],
+                                                               vp->aos_desc[i].size,
+                                vp->aos_desc[i].type,
+                                                               &vtxFetchMethod);
+               }
+       }
+       
+       return GL_TRUE;
+}
+
+void Map_Vertex_Program(struct r700_vertex_program *vp,
+                                               struct gl_vertex_program   *mesa_vp)
+{
+    GLuint ui;
+    r700_AssemblerBase *pAsm = &(vp->r700AsmCode);
+       unsigned int num_inputs;
+
+       // R0 will always be used for index into vertex buffer
+       pAsm->number_used_registers = 1;
+       pAsm->starting_vfetch_register_number = pAsm->number_used_registers;
+
+    // Map Inputs: Add 1 to mapping since R0 is used for index
+       num_inputs = Map_Vertex_Input(pAsm, mesa_vp, pAsm->number_used_registers);
+       pAsm->number_used_registers += num_inputs;
+
+       // Create VFETCH instructions for inputs
+       if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions(vp, mesa_vp) ) 
+       {
+               r700_error(ERROR_ASM_VTX_CLAUSE, "Calling Process_Vertex_Program_Vfetch_Instructions return error. \n");
+               return; //error
+       }
+
+       // Map Outputs
+       pAsm->number_of_exports = Map_Vertex_Output(pAsm, mesa_vp, pAsm->number_used_registers);
+
+       pAsm->starting_export_register_number = pAsm->number_used_registers;
+
+       pAsm->number_used_registers += pAsm->number_of_exports;
+
+    pAsm->pucOutMask = (unsigned char*) MALLOC(pAsm->number_of_exports);
+    
+    for(ui=0; ui<pAsm->number_of_exports; ui++)
+    {
+        pAsm->pucOutMask[ui] = 0x0;
+    }
+
+    /* Map temporary registers (GPRs) */
+    pAsm->starting_temp_register_number = pAsm->number_used_registers;
+
+    if(mesa_vp->Base.NumNativeTemporaries >= mesa_vp->Base.NumTemporaries)
+    {   /* arb uses NumNativeTemporaries */
+        pAsm->number_used_registers += mesa_vp->Base.NumNativeTemporaries;
+    }
+    else
+    {   /* fix func t_vp uses NumTemporaries */
+        pAsm->number_used_registers += mesa_vp->Base.NumTemporaries;
+    }
+       
+    pAsm->uFirstHelpReg = pAsm->number_used_registers;
+}
+
+GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp,
+                                                               struct gl_vertex_program   *mesa_vp)
+{
+    GLuint i, j;
+    GLint * puiTEMPwrites;
+    struct prog_instruction *pILInst;
+    InstDeps         *pInstDeps;
+
+    puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_vp->Base.NumTemporaries);
+    for(i=0; i<mesa_vp->Base.NumTemporaries; i++)
+    {
+        puiTEMPwrites[i] = -1;
+    }
+
+    pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_vp->Base.NumInstructions);
+
+    for(i=0; i<mesa_vp->Base.NumInstructions; i++)
+    {
+        pInstDeps[i].nDstDep = -1;
+        pILInst = &(mesa_vp->Base.Instructions[i]);
+
+        //Dst
+        if(pILInst->DstReg.File == PROGRAM_TEMPORARY)
+        {
+            //Set lastwrite for the temp
+            puiTEMPwrites[pILInst->DstReg.Index] = i;
+        }
+
+        //Src
+        for(j=0; j<3; j++)
+        {
+            if(pILInst->SrcReg[j].File == PROGRAM_TEMPORARY)
+            {
+                //Set dep.
+                pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index];
+            }
+            else
+            {
+                pInstDeps[i].nSrcDeps[j] = -1;
+            }
+        }
+    }
+
+    vp->r700AsmCode.pInstDeps = pInstDeps;
+
+    FREE(puiTEMPwrites);
+
+    return GL_TRUE;
+}
+
+GLboolean r700TranslateVertexShader(struct r700_vertex_program *vp,
+                                                          struct gl_vertex_program   *mesa_vp)
+{
+       //Init_Program
+       Init_r700_AssemblerBase(SPT_VP, &(vp->r700AsmCode), &(vp->r700Shader) );
+       Map_Vertex_Program( vp, mesa_vp );
+
+       if(GL_FALSE == Find_Instruction_Dependencies_vp(vp, mesa_vp))
+       {
+               return GL_FALSE;
+    }
+
+       if(GL_FALSE == AssembleInstr(mesa_vp->Base.NumInstructions,
+                                 &(mesa_vp->Base.Instructions[0]), 
+                                 &(vp->r700AsmCode)) )
+       {
+               return GL_FALSE;
+       } 
+
+    if(GL_FALSE == Process_Vertex_Exports(&(vp->r700AsmCode), mesa_vp->Base.OutputsWritten) )
+    {
+        return GL_FALSE;
+    }
+
+    vp->r700Shader.nRegs = (vp->r700AsmCode.number_used_registers == 0) ? 0 
+                         : (vp->r700AsmCode.number_used_registers - 1);
+
+       vp->r700Shader.nParamExports = vp->r700AsmCode.number_of_exports;
+
+    vp->translated = GL_TRUE;
+
+       return GL_TRUE;
+}
+
+void r700SelectVertexShader(GLcontext *ctx)
+{
+#if 0 /* to be enabled */
+    context_t *context = R700_CONTEXT(ctx);
+    struct r700_vertex_program *vpc
+             = (struct r700_vertex_program *)ctx->VertexProgram._Current;
+    if (context->screen->chip.type <= CHIP_TYPE_RV670)
+    {
+        vpc->r700AsmCode.bR6xx = 1;
+    }
+    
+    TNLcontext *tnl = TNL_CONTEXT(ctx);
+       struct vertex_buffer *vb = &tnl->vb;
+
+    unsigned int unBit;
+       unsigned int i;
+       for(i=0; i<VERT_ATTRIB_MAX; i++)
+       {
+               unBit = 1 << i;
+               if(vpc->mesa_program.Base.InputsRead & unBit) /* ctx->Array.ArrayObj->xxxxxxx */
+               {
+            vpc->aos_desc[i].size   = vb->AttribPtr[i]->size;
+            vpc->aos_desc[i].stride = vb->AttribPtr[i]->size * sizeof(GL_FLOAT);/* when emit array, data is packed. vb->AttribPtr[i]->stride;*/
+                       vpc->aos_desc[i].type   = GL_FLOAT;
+               }
+       }
+
+    if(GL_FALSE == vpc->translated)
+    {
+        r700TranslateVertexShader(vpc,
+                                                         &(vpc->mesa_program) );
+    }
+#endif /* to be enabled */
+}
+
+void r700SetupVTXConstans(GLcontext  * ctx, 
+                          unsigned int nStreamID,
+                          unsigned int aos_offset,
+                          unsigned int size,      /* number of elements in vector */
+                          unsigned int stride,
+                          unsigned int count)     /* number of vectors in stream */
+{
+    context_t *context = R700_CONTEXT(ctx);
+    uint32_t *dest;
+
+    unsigned int uSQ_VTX_CONSTANT_WORD0_0;
+    unsigned int uSQ_VTX_CONSTANT_WORD1_0;
+    unsigned int uSQ_VTX_CONSTANT_WORD2_0 = 0;
+    unsigned int uSQ_VTX_CONSTANT_WORD3_0 = 0;
+    unsigned int uSQ_VTX_CONSTANT_WORD6_0 = 0;
+
+    uSQ_VTX_CONSTANT_WORD0_0 = aos_offset;
+       uSQ_VTX_CONSTANT_WORD1_0 = count * stride - 1;
+
+       uSQ_VTX_CONSTANT_WORD2_0 |= 0 << BASE_ADDRESS_HI_shift /* TODO */
+                             |stride << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift  
+                                |GetSurfaceFormat(GL_FLOAT, size, NULL) << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift /* TODO : trace back api for initial data type, not only GL_FLOAT */
+                                |SQ_NUM_FORMAT_SCALED << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift
+                                |SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit;
+       
+       uSQ_VTX_CONSTANT_WORD3_0 |= 1 << MEM_REQUEST_SIZE_shift;
+       
+       uSQ_VTX_CONSTANT_WORD6_0 |= SQ_TEX_VTX_VALID_BUFFER << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift;
+#if 0 /* to be enabled */
+    R700_CMDBUF_CHECK_SPACE(9);
+    R700EP3 (context, IT_SET_RESOURCE, 7);
+    R700E32 (context, (nStreamID + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE);
+    
+       R700E32 (context, uSQ_VTX_CONSTANT_WORD0_0);
+       R700E32 (context, uSQ_VTX_CONSTANT_WORD1_0);
+       R700E32 (context, uSQ_VTX_CONSTANT_WORD2_0);
+       R700E32 (context, uSQ_VTX_CONSTANT_WORD3_0);
+       R700E32 (context, 0);
+       R700E32 (context, 0);
+       R700E32 (context, uSQ_VTX_CONSTANT_WORD6_0);
+#endif /* to be enabled */
+}
+
+GLboolean r700SetupVertexProgram(GLcontext * ctx)
+{
+    context_t *context = R700_CONTEXT(ctx);
+
+    R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj);
+
+    struct r700_vertex_program *vp
+             = (struct r700_vertex_program *)ctx->VertexProgram._Current;
+
+    struct gl_program_parameter_list *paramList;
+    unsigned int unNumParamData;
+
+    unsigned int ui;
+
+    if(GL_FALSE == vp->loaded)
+    {
+        if(vp->r700Shader.bNeedsAssembly == GL_TRUE)
+           {
+                   Assemble( &(vp->r700Shader) );
+           }
+
+        /* Load vp to gpu */
+        (context->chipobj.EmitShader)(ctx, 
+                       &(vp->shadercode), 
+                       (GLvoid *)(vp->r700Shader.pProgram),
+                       vp->r700Shader.uShaderBinaryDWORDSize); 
+
+        vp->loaded = GL_TRUE;
+    }
+
+    DumpHwBinary(DUMP_VERTEX_SHADER, (GLvoid *)(vp->r700Shader.pProgram),
+                 vp->r700Shader.uShaderBinaryDWORDSize);
+
+    /* TODO : enable this after MemUse fixed *=
+    (context->chipobj.MemUse)(context, vp->shadercode.buf->id);
+    */
+
+    r700->SQ_PGM_START_VS.u32All            = (vp->shadercode.aos_offset >> 8) & 0x00FFFFFF;
+    
+    SETfield(r700->SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.nRegs + 1,
+             NUM_GPRS_shift, NUM_GPRS_mask);
+
+    if(vp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */
+       {
+        SETfield(r700->SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.uStackSize,
+                 STACK_SIZE_shift, STACK_SIZE_mask);
+    }
+
+    SETfield(r700->SPI_VS_OUT_CONFIG.u32All, vp->r700Shader.nParamExports - 1,
+             VS_EXPORT_COUNT_shift, VS_EXPORT_COUNT_mask);
+       SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, vp->r700Shader.nParamExports,
+             NUM_INTERP_shift, NUM_INTERP_mask);
+
+    /*
+    SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, PERSP_GRADIENT_ENA_bit);
+    CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit);
+    */
+
+    /* sent out shader constants. */
+
+    paramList = vp->mesa_program.Base.Parameters;
+
+    if(NULL != paramList)
+    {
+        _mesa_load_state_parameters(ctx, paramList);
+#if 0 /* to be enabled */
+        unNumParamData = paramList->NumParameters * 4;
+
+        R700_CMDBUF_CHECK_SPACE(unNumParamData + 2);
+        R700EP3 (context, IT_SET_ALU_CONST, unNumParamData);
+        /* assembler map const from very beginning. */
+        R700E32 (context, SQ_ALU_CONSTANT_VS_OFFSET * 4);
+
+        unNumParamData = paramList->NumParameters;
+
+        for(ui=0; ui<unNumParamData; ui++)
+        {
+            R700E32 (context, *((unsigned int*)&(paramList->ParameterValues[ui][0])));
+            R700E32 (context, *((unsigned int*)&(paramList->ParameterValues[ui][1])));
+            R700E32 (context, *((unsigned int*)&(paramList->ParameterValues[ui][2])));
+            R700E32 (context, *((unsigned int*)&(paramList->ParameterValues[ui][3])));
+        }
+#endif /* to be enabled */
+    }
+
+    return GL_TRUE;
+}
+
+
+
+
diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.h b/src/mesa/drivers/dri/r600/r700_vertprog.h
new file mode 100644 (file)
index 0000000..0822335
--- /dev/null
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+
+#ifndef _R700_VERTPROG_H_
+#define _R700_VERTPROG_H_
+
+#include "main/glheader.h"
+#include "main/mtypes.h" 
+
+#include "r700_shader.h"
+#include "r700_assembler.h"
+
+typedef struct ArrayDesc //TEMP
+{
+       GLint size;   //number of data element
+       GLenum type;  //data element type
+       GLsizei stride;
+} ArrayDesc;
+
+struct r700_vertex_program 
+{
+    struct gl_vertex_program mesa_program; /* Must be first */
+
+       struct r700_vertex_program *next;
+
+    r700_AssemblerBase r700AsmCode;
+    R700_Shader        r700Shader;
+
+       GLboolean translated;
+    GLboolean loaded;
+       
+       /* ... */
+
+    struct r600_dma_region shadercode;
+       ArrayDesc              aos_desc[VERT_ATTRIB_MAX];
+};
+
+//Internal
+unsigned int Map_Vertex_Output(r700_AssemblerBase       *pAsm, 
+                                                  struct gl_vertex_program *mesa_vp,
+                                                  unsigned int unStart);
+unsigned int Map_Vertex_Input(r700_AssemblerBase       *pAsm, 
+                                                 struct gl_vertex_program *mesa_vp,
+                                                 unsigned int unStart);
+GLboolean Process_Vertex_Program_Vfetch_Instructions(
+                                               struct r700_vertex_program *vp,
+                                               struct gl_vertex_program   *mesa_vp);
+void Map_Vertex_Program(struct r700_vertex_program *vp,
+                                               struct gl_vertex_program   *mesa_vp);
+GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp,
+                                                                  struct gl_vertex_program   *mesa_vp);
+
+/* Interface */
+extern GLboolean r700TranslateVertexShader(struct r700_vertex_program *vp,
+                                                          struct gl_vertex_program   *mesa_vp);
+
+extern void r700SelectVertexShader(GLcontext *ctx);
+
+extern GLboolean r700SetupVertexProgram(GLcontext * ctx);
+
+extern void      r700SetupVTXConstans(GLcontext  * ctx, 
+                                      unsigned int nStreamID,
+                                      unsigned int aos_offset,
+                                      unsigned int size,      /* number of elements in vector */
+                                      unsigned int stride,
+                                      unsigned int Count);    /* number of vectors in stream */
+
+#endif /* _R700_VERTPROG_H_ */
diff --git a/src/mesa/drivers/dri/r600/sq_micro_reg.h b/src/mesa/drivers/dri/r600/sq_micro_reg.h
new file mode 100644 (file)
index 0000000..bfd21ce
--- /dev/null
@@ -0,0 +1,2008 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Contacts:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#if !defined (_SQ_MICRO_REG_H)
+#define _SQ_MICRO_REG_H
+
+#if defined(LITTLEENDIAN_CPU)
+#elif defined(BIGENDIAN_CPU)
+#else
+#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined"
+#endif
+
+/*
+ * SQ_ALU_SRC_GPR_BASE value
+ */
+
+#define SQ_ALU_SRC_GPR_BASE            0x00000000
+
+/*
+ * SQ_ALU_SRC_GPR_SIZE value
+ */
+
+#define SQ_ALU_SRC_GPR_SIZE            0x00000080
+
+/*
+ * SQ_ALU_SRC_KCACHE0_BASE value
+ */
+
+#define SQ_ALU_SRC_KCACHE0_BASE        0x00000080
+
+/*
+ * SQ_ALU_SRC_KCACHE0_SIZE value
+ */
+
+#define SQ_ALU_SRC_KCACHE0_SIZE        0x00000020
+
+/*
+ * SQ_ALU_SRC_KCACHE1_BASE value
+ */
+
+#define SQ_ALU_SRC_KCACHE1_BASE        0x000000a0
+
+/*
+ * SQ_ALU_SRC_KCACHE1_SIZE value
+ */
+
+#define SQ_ALU_SRC_KCACHE1_SIZE        0x00000020
+
+/*
+ * SQ_ALU_SRC_CFILE_BASE value
+ */
+
+#define SQ_ALU_SRC_CFILE_BASE          0x00000100
+
+/*
+ * SQ_ALU_SRC_CFILE_SIZE value
+ */
+
+#define SQ_ALU_SRC_CFILE_SIZE          0x00000100
+
+/*
+ * SQ_SP_OP_REDUC_BEGIN value
+ */
+
+#define SQ_SP_OP_REDUC_BEGIN           0x00000050
+
+/*
+ * SQ_SP_OP_REDUC_END value
+ */
+
+#define SQ_SP_OP_REDUC_END             0x00000053
+
+/*
+ * SQ_SP_OP_TRANS_BEGIN value
+ */
+
+#define SQ_SP_OP_TRANS_BEGIN           0x00000060
+
+/*
+ * SQ_SP_OP_TRANS_END value
+ */
+
+#define SQ_SP_OP_TRANS_END             0x0000007f
+
+/*
+ * SQ_CF_WORD0 struct
+ */
+
+#define SQ_CF_WORD0_ADDR_SIZE          32
+
+#define SQ_CF_WORD0_ADDR_SHIFT         0
+
+#define SQ_CF_WORD0_ADDR_MASK          0xffffffff
+
+#define SQ_CF_WORD0_MASK \
+     (SQ_CF_WORD0_ADDR_MASK)
+
+#define SQ_CF_WORD0_DEFAULT            0xcdcdcdcd
+
+#define SQ_CF_WORD0_GET_ADDR(sq_cf_word0) \
+     ((sq_cf_word0 & SQ_CF_WORD0_ADDR_MASK) >> SQ_CF_WORD0_ADDR_SHIFT)
+
+#define SQ_CF_WORD0_SET_ADDR(sq_cf_word0_reg, addr) \
+     sq_cf_word0_reg = (sq_cf_word0_reg & ~SQ_CF_WORD0_ADDR_MASK) | (addr << SQ_CF_WORD0_ADDR_SHIFT)
+
+#if            defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_cf_word0_t {
+          unsigned int addr                           : SQ_CF_WORD0_ADDR_SIZE;
+     } sq_cf_word0_t;
+
+#elif          defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_cf_word0_t {
+          unsigned int addr                           : SQ_CF_WORD0_ADDR_SIZE;
+     } sq_cf_word0_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_cf_word0_t f;
+} sq_cf_word0_u;
+
+
+/*
+ * SQ_CF_WORD1 struct
+ */
+
+#define SQ_CF_WORD1_POP_COUNT_SIZE     3
+#define SQ_CF_WORD1_CF_CONST_SIZE      5
+#define SQ_CF_WORD1_COND_SIZE          2
+#define SQ_CF_WORD1_COUNT_SIZE         3
+#define SQ_CF_WORD1_CALL_COUNT_SIZE    6
+#define SQ_CF_WORD1_COUNT_3_SIZE       1
+#define SQ_CF_WORD1_END_OF_PROGRAM_SIZE 1
+#define SQ_CF_WORD1_VALID_PIXEL_MODE_SIZE 1
+#define SQ_CF_WORD1_CF_INST_SIZE       7
+#define SQ_CF_WORD1_WHOLE_QUAD_MODE_SIZE 1
+#define SQ_CF_WORD1_BARRIER_SIZE       1
+
+#define SQ_CF_WORD1_POP_COUNT_SHIFT    0
+#define SQ_CF_WORD1_CF_CONST_SHIFT     3
+#define SQ_CF_WORD1_COND_SHIFT         8
+#define SQ_CF_WORD1_COUNT_SHIFT        10
+#define SQ_CF_WORD1_CALL_COUNT_SHIFT   13
+#define SQ_CF_WORD1_COUNT_3_SHIFT      19
+#define SQ_CF_WORD1_END_OF_PROGRAM_SHIFT 21
+#define SQ_CF_WORD1_VALID_PIXEL_MODE_SHIFT 22
+#define SQ_CF_WORD1_CF_INST_SHIFT      23
+#define SQ_CF_WORD1_WHOLE_QUAD_MODE_SHIFT 30
+#define SQ_CF_WORD1_BARRIER_SHIFT      31
+
+#define SQ_CF_WORD1_POP_COUNT_MASK     0x00000007
+#define SQ_CF_WORD1_CF_CONST_MASK      0x000000f8
+#define SQ_CF_WORD1_COND_MASK          0x00000300
+#define SQ_CF_WORD1_COUNT_MASK         0x00001c00
+#define SQ_CF_WORD1_CALL_COUNT_MASK    0x0007e000
+#define SQ_CF_WORD1_COUNT_3_MASK       0x00080000
+#define SQ_CF_WORD1_END_OF_PROGRAM_MASK 0x00200000
+#define SQ_CF_WORD1_VALID_PIXEL_MODE_MASK 0x00400000
+#define SQ_CF_WORD1_CF_INST_MASK       0x3f800000
+#define SQ_CF_WORD1_WHOLE_QUAD_MODE_MASK 0x40000000
+#define SQ_CF_WORD1_BARRIER_MASK       0x80000000
+
+#define SQ_CF_WORD1_MASK \
+     (SQ_CF_WORD1_POP_COUNT_MASK | \
+      SQ_CF_WORD1_CF_CONST_MASK | \
+      SQ_CF_WORD1_COND_MASK | \
+      SQ_CF_WORD1_COUNT_MASK | \
+      SQ_CF_WORD1_CALL_COUNT_MASK | \
+      SQ_CF_WORD1_COUNT_3_MASK | \
+      SQ_CF_WORD1_END_OF_PROGRAM_MASK | \
+      SQ_CF_WORD1_VALID_PIXEL_MODE_MASK | \
+      SQ_CF_WORD1_CF_INST_MASK | \
+      SQ_CF_WORD1_WHOLE_QUAD_MODE_MASK | \
+      SQ_CF_WORD1_BARRIER_MASK)
+
+#define SQ_CF_WORD1_DEFAULT            0xcdcdcdcd
+
+#define SQ_CF_WORD1_GET_POP_COUNT(sq_cf_word1) \
+     ((sq_cf_word1 & SQ_CF_WORD1_POP_COUNT_MASK) >> SQ_CF_WORD1_POP_COUNT_SHIFT)
+#define SQ_CF_WORD1_GET_CF_CONST(sq_cf_word1) \
+     ((sq_cf_word1 & SQ_CF_WORD1_CF_CONST_MASK) >> SQ_CF_WORD1_CF_CONST_SHIFT)
+#define SQ_CF_WORD1_GET_COND(sq_cf_word1) \
+     ((sq_cf_word1 & SQ_CF_WORD1_COND_MASK) >> SQ_CF_WORD1_COND_SHIFT)
+#define SQ_CF_WORD1_GET_COUNT(sq_cf_word1) \
+     ((sq_cf_word1 & SQ_CF_WORD1_COUNT_MASK) >> SQ_CF_WORD1_COUNT_SHIFT)
+#define SQ_CF_WORD1_GET_CALL_COUNT(sq_cf_word1) \
+     ((sq_cf_word1 & SQ_CF_WORD1_CALL_COUNT_MASK) >> SQ_CF_WORD1_CALL_COUNT_SHIFT)
+#define SQ_CF_WORD1_GET_COUNT_3(sq_cf_word1) \
+     ((sq_cf_word1 & SQ_CF_WORD1_COUNT_3_MASK) >> SQ_CF_WORD1_COUNT_3_SHIFT)
+#define SQ_CF_WORD1_GET_END_OF_PROGRAM(sq_cf_word1) \
+     ((sq_cf_word1 & SQ_CF_WORD1_END_OF_PROGRAM_MASK) >> SQ_CF_WORD1_END_OF_PROGRAM_SHIFT)
+#define SQ_CF_WORD1_GET_VALID_PIXEL_MODE(sq_cf_word1) \
+     ((sq_cf_word1 & SQ_CF_WORD1_VALID_PIXEL_MODE_MASK) >> SQ_CF_WORD1_VALID_PIXEL_MODE_SHIFT)
+#define SQ_CF_WORD1_GET_CF_INST(sq_cf_word1) \
+     ((sq_cf_word1 & SQ_CF_WORD1_CF_INST_MASK) >> SQ_CF_WORD1_CF_INST_SHIFT)
+#define SQ_CF_WORD1_GET_WHOLE_QUAD_MODE(sq_cf_word1) \
+     ((sq_cf_word1 & SQ_CF_WORD1_WHOLE_QUAD_MODE_MASK) >> SQ_CF_WORD1_WHOLE_QUAD_MODE_SHIFT)
+#define SQ_CF_WORD1_GET_BARRIER(sq_cf_word1) \
+     ((sq_cf_word1 & SQ_CF_WORD1_BARRIER_MASK) >> SQ_CF_WORD1_BARRIER_SHIFT)
+
+#define SQ_CF_WORD1_SET_POP_COUNT(sq_cf_word1_reg, pop_count) \
+     sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_POP_COUNT_MASK) | (pop_count << SQ_CF_WORD1_POP_COUNT_SHIFT)
+#define SQ_CF_WORD1_SET_CF_CONST(sq_cf_word1_reg, cf_const) \
+     sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_CF_CONST_MASK) | (cf_const << SQ_CF_WORD1_CF_CONST_SHIFT)
+#define SQ_CF_WORD1_SET_COND(sq_cf_word1_reg, cond) \
+     sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_COND_MASK) | (cond << SQ_CF_WORD1_COND_SHIFT)
+#define SQ_CF_WORD1_SET_COUNT(sq_cf_word1_reg, count) \
+     sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_COUNT_MASK) | (count << SQ_CF_WORD1_COUNT_SHIFT)
+#define SQ_CF_WORD1_SET_CALL_COUNT(sq_cf_word1_reg, call_count) \
+     sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_CALL_COUNT_MASK) | (call_count << SQ_CF_WORD1_CALL_COUNT_SHIFT)
+#define SQ_CF_WORD1_SET_COUNT_3(sq_cf_word1_reg, count_3) \
+     sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_COUNT_3_MASK) | (count_3 << SQ_CF_WORD1_COUNT_3_SHIFT)
+#define SQ_CF_WORD1_SET_END_OF_PROGRAM(sq_cf_word1_reg, end_of_program) \
+     sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_END_OF_PROGRAM_MASK) | (end_of_program << SQ_CF_WORD1_END_OF_PROGRAM_SHIFT)
+#define SQ_CF_WORD1_SET_VALID_PIXEL_MODE(sq_cf_word1_reg, valid_pixel_mode) \
+     sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_VALID_PIXEL_MODE_MASK) | (valid_pixel_mode << SQ_CF_WORD1_VALID_PIXEL_MODE_SHIFT)
+#define SQ_CF_WORD1_SET_CF_INST(sq_cf_word1_reg, cf_inst) \
+     sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_CF_INST_MASK) | (cf_inst << SQ_CF_WORD1_CF_INST_SHIFT)
+#define SQ_CF_WORD1_SET_WHOLE_QUAD_MODE(sq_cf_word1_reg, whole_quad_mode) \
+     sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_WHOLE_QUAD_MODE_MASK) | (whole_quad_mode << SQ_CF_WORD1_WHOLE_QUAD_MODE_SHIFT)
+#define SQ_CF_WORD1_SET_BARRIER(sq_cf_word1_reg, barrier) \
+     sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_BARRIER_MASK) | (barrier << SQ_CF_WORD1_BARRIER_SHIFT)
+
+#if            defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_cf_word1_t {
+          unsigned int pop_count                      : SQ_CF_WORD1_POP_COUNT_SIZE;
+          unsigned int cf_const                       : SQ_CF_WORD1_CF_CONST_SIZE;
+          unsigned int cond                           : SQ_CF_WORD1_COND_SIZE;
+          unsigned int count                          : SQ_CF_WORD1_COUNT_SIZE;
+          unsigned int call_count                     : SQ_CF_WORD1_CALL_COUNT_SIZE;
+          unsigned int count_3                        : SQ_CF_WORD1_COUNT_3_SIZE;
+          unsigned int                                : 1;
+          unsigned int end_of_program                 : SQ_CF_WORD1_END_OF_PROGRAM_SIZE;
+          unsigned int valid_pixel_mode               : SQ_CF_WORD1_VALID_PIXEL_MODE_SIZE;
+          unsigned int cf_inst                        : SQ_CF_WORD1_CF_INST_SIZE;
+          unsigned int whole_quad_mode                : SQ_CF_WORD1_WHOLE_QUAD_MODE_SIZE;
+          unsigned int barrier                        : SQ_CF_WORD1_BARRIER_SIZE;
+     } sq_cf_word1_t;
+
+#elif          defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_cf_word1_t {
+          unsigned int barrier                        : SQ_CF_WORD1_BARRIER_SIZE;
+          unsigned int whole_quad_mode                : SQ_CF_WORD1_WHOLE_QUAD_MODE_SIZE;
+          unsigned int cf_inst                        : SQ_CF_WORD1_CF_INST_SIZE;
+          unsigned int valid_pixel_mode               : SQ_CF_WORD1_VALID_PIXEL_MODE_SIZE;
+          unsigned int end_of_program                 : SQ_CF_WORD1_END_OF_PROGRAM_SIZE;
+          unsigned int                                : 1;
+          unsigned int count_3                        : SQ_CF_WORD1_COUNT_3_SIZE;
+          unsigned int call_count                     : SQ_CF_WORD1_CALL_COUNT_SIZE;
+          unsigned int count                          : SQ_CF_WORD1_COUNT_SIZE;
+          unsigned int cond                           : SQ_CF_WORD1_COND_SIZE;
+          unsigned int cf_const                       : SQ_CF_WORD1_CF_CONST_SIZE;
+          unsigned int pop_count                      : SQ_CF_WORD1_POP_COUNT_SIZE;
+     } sq_cf_word1_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_cf_word1_t f;
+} sq_cf_word1_u;
+
+
+/*
+ * SQ_CF_ALU_WORD0 struct
+ */
+
+#define SQ_CF_ALU_WORD0_ADDR_SIZE      22
+#define SQ_CF_ALU_WORD0_KCACHE_BANK0_SIZE 4
+#define SQ_CF_ALU_WORD0_KCACHE_BANK1_SIZE 4
+#define SQ_CF_ALU_WORD0_KCACHE_MODE0_SIZE 2
+
+#define SQ_CF_ALU_WORD0_ADDR_SHIFT     0
+#define SQ_CF_ALU_WORD0_KCACHE_BANK0_SHIFT 22
+#define SQ_CF_ALU_WORD0_KCACHE_BANK1_SHIFT 26
+#define SQ_CF_ALU_WORD0_KCACHE_MODE0_SHIFT 30
+
+#define SQ_CF_ALU_WORD0_ADDR_MASK      0x003fffff
+#define SQ_CF_ALU_WORD0_KCACHE_BANK0_MASK 0x03c00000
+#define SQ_CF_ALU_WORD0_KCACHE_BANK1_MASK 0x3c000000
+#define SQ_CF_ALU_WORD0_KCACHE_MODE0_MASK 0xc0000000
+
+#define SQ_CF_ALU_WORD0_MASK \
+     (SQ_CF_ALU_WORD0_ADDR_MASK | \
+      SQ_CF_ALU_WORD0_KCACHE_BANK0_MASK | \
+      SQ_CF_ALU_WORD0_KCACHE_BANK1_MASK | \
+      SQ_CF_ALU_WORD0_KCACHE_MODE0_MASK)
+
+#define SQ_CF_ALU_WORD0_DEFAULT        0xcdcdcdcd
+
+#define SQ_CF_ALU_WORD0_GET_ADDR(sq_cf_alu_word0) \
+     ((sq_cf_alu_word0 & SQ_CF_ALU_WORD0_ADDR_MASK) >> SQ_CF_ALU_WORD0_ADDR_SHIFT)
+#define SQ_CF_ALU_WORD0_GET_KCACHE_BANK0(sq_cf_alu_word0) \
+     ((sq_cf_alu_word0 & SQ_CF_ALU_WORD0_KCACHE_BANK0_MASK) >> SQ_CF_ALU_WORD0_KCACHE_BANK0_SHIFT)
+#define SQ_CF_ALU_WORD0_GET_KCACHE_BANK1(sq_cf_alu_word0) \
+     ((sq_cf_alu_word0 & SQ_CF_ALU_WORD0_KCACHE_BANK1_MASK) >> SQ_CF_ALU_WORD0_KCACHE_BANK1_SHIFT)
+#define SQ_CF_ALU_WORD0_GET_KCACHE_MODE0(sq_cf_alu_word0) \
+     ((sq_cf_alu_word0 & SQ_CF_ALU_WORD0_KCACHE_MODE0_MASK) >> SQ_CF_ALU_WORD0_KCACHE_MODE0_SHIFT)
+
+#define SQ_CF_ALU_WORD0_SET_ADDR(sq_cf_alu_word0_reg, addr) \
+     sq_cf_alu_word0_reg = (sq_cf_alu_word0_reg & ~SQ_CF_ALU_WORD0_ADDR_MASK) | (addr << SQ_CF_ALU_WORD0_ADDR_SHIFT)
+#define SQ_CF_ALU_WORD0_SET_KCACHE_BANK0(sq_cf_alu_word0_reg, kcache_bank0) \
+     sq_cf_alu_word0_reg = (sq_cf_alu_word0_reg & ~SQ_CF_ALU_WORD0_KCACHE_BANK0_MASK) | (kcache_bank0 << SQ_CF_ALU_WORD0_KCACHE_BANK0_SHIFT)
+#define SQ_CF_ALU_WORD0_SET_KCACHE_BANK1(sq_cf_alu_word0_reg, kcache_bank1) \
+     sq_cf_alu_word0_reg = (sq_cf_alu_word0_reg & ~SQ_CF_ALU_WORD0_KCACHE_BANK1_MASK) | (kcache_bank1 << SQ_CF_ALU_WORD0_KCACHE_BANK1_SHIFT)
+#define SQ_CF_ALU_WORD0_SET_KCACHE_MODE0(sq_cf_alu_word0_reg, kcache_mode0) \
+     sq_cf_alu_word0_reg = (sq_cf_alu_word0_reg & ~SQ_CF_ALU_WORD0_KCACHE_MODE0_MASK) | (kcache_mode0 << SQ_CF_ALU_WORD0_KCACHE_MODE0_SHIFT)
+
+#if            defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_cf_alu_word0_t {
+          unsigned int addr                           : SQ_CF_ALU_WORD0_ADDR_SIZE;
+          unsigned int kcache_bank0                   : SQ_CF_ALU_WORD0_KCACHE_BANK0_SIZE;
+          unsigned int kcache_bank1                   : SQ_CF_ALU_WORD0_KCACHE_BANK1_SIZE;
+          unsigned int kcache_mode0                   : SQ_CF_ALU_WORD0_KCACHE_MODE0_SIZE;
+     } sq_cf_alu_word0_t;
+
+#elif          defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_cf_alu_word0_t {
+          unsigned int kcache_mode0                   : SQ_CF_ALU_WORD0_KCACHE_MODE0_SIZE;
+          unsigned int kcache_bank1                   : SQ_CF_ALU_WORD0_KCACHE_BANK1_SIZE;
+          unsigned int kcache_bank0                   : SQ_CF_ALU_WORD0_KCACHE_BANK0_SIZE;
+          unsigned int addr                           : SQ_CF_ALU_WORD0_ADDR_SIZE;
+     } sq_cf_alu_word0_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_cf_alu_word0_t f;
+} sq_cf_alu_word0_u;
+
+
+/*
+ * SQ_CF_ALU_WORD1 struct
+ */
+
+#define SQ_CF_ALU_WORD1_KCACHE_MODE1_SIZE 2
+#define SQ_CF_ALU_WORD1_KCACHE_ADDR0_SIZE 8
+#define SQ_CF_ALU_WORD1_KCACHE_ADDR1_SIZE 8
+#define SQ_CF_ALU_WORD1_COUNT_SIZE     7
+#define SQ_CF_ALU_WORD1_ALT_CONST_SIZE 1
+#define SQ_CF_ALU_WORD1_CF_INST_SIZE   4
+#define SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_SIZE 1
+#define SQ_CF_ALU_WORD1_BARRIER_SIZE   1
+
+#define SQ_CF_ALU_WORD1_KCACHE_MODE1_SHIFT 0
+#define SQ_CF_ALU_WORD1_KCACHE_ADDR0_SHIFT 2
+#define SQ_CF_ALU_WORD1_KCACHE_ADDR1_SHIFT 10
+#define SQ_CF_ALU_WORD1_COUNT_SHIFT    18
+#define SQ_CF_ALU_WORD1_ALT_CONST_SHIFT 25
+#define SQ_CF_ALU_WORD1_CF_INST_SHIFT  26
+#define SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_SHIFT 30
+#define SQ_CF_ALU_WORD1_BARRIER_SHIFT  31
+
+#define SQ_CF_ALU_WORD1_KCACHE_MODE1_MASK 0x00000003
+#define SQ_CF_ALU_WORD1_KCACHE_ADDR0_MASK 0x000003fc
+#define SQ_CF_ALU_WORD1_KCACHE_ADDR1_MASK 0x0003fc00
+#define SQ_CF_ALU_WORD1_COUNT_MASK     0x01fc0000
+#define SQ_CF_ALU_WORD1_ALT_CONST_MASK 0x02000000
+#define SQ_CF_ALU_WORD1_CF_INST_MASK   0x3c000000
+#define SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_MASK 0x40000000
+#define SQ_CF_ALU_WORD1_BARRIER_MASK   0x80000000
+
+#define SQ_CF_ALU_WORD1_MASK \
+     (SQ_CF_ALU_WORD1_KCACHE_MODE1_MASK | \
+      SQ_CF_ALU_WORD1_KCACHE_ADDR0_MASK | \
+      SQ_CF_ALU_WORD1_KCACHE_ADDR1_MASK | \
+      SQ_CF_ALU_WORD1_COUNT_MASK | \
+      SQ_CF_ALU_WORD1_ALT_CONST_MASK | \
+      SQ_CF_ALU_WORD1_CF_INST_MASK | \
+      SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_MASK | \
+      SQ_CF_ALU_WORD1_BARRIER_MASK)
+
+#define SQ_CF_ALU_WORD1_DEFAULT        0xcdcdcdcd
+
+#define SQ_CF_ALU_WORD1_GET_KCACHE_MODE1(sq_cf_alu_word1) \
+     ((sq_cf_alu_word1 & SQ_CF_ALU_WORD1_KCACHE_MODE1_MASK) >> SQ_CF_ALU_WORD1_KCACHE_MODE1_SHIFT)
+#define SQ_CF_ALU_WORD1_GET_KCACHE_ADDR0(sq_cf_alu_word1) \
+     ((sq_cf_alu_word1 & SQ_CF_ALU_WORD1_KCACHE_ADDR0_MASK) >> SQ_CF_ALU_WORD1_KCACHE_ADDR0_SHIFT)
+#define SQ_CF_ALU_WORD1_GET_KCACHE_ADDR1(sq_cf_alu_word1) \
+     ((sq_cf_alu_word1 & SQ_CF_ALU_WORD1_KCACHE_ADDR1_MASK) >> SQ_CF_ALU_WORD1_KCACHE_ADDR1_SHIFT)
+#define SQ_CF_ALU_WORD1_GET_COUNT(sq_cf_alu_word1) \
+     ((sq_cf_alu_word1 & SQ_CF_ALU_WORD1_COUNT_MASK) >> SQ_CF_ALU_WORD1_COUNT_SHIFT)
+#define SQ_CF_ALU_WORD1_GET_ALT_CONST(sq_cf_alu_word1) \
+     ((sq_cf_alu_word1 & SQ_CF_ALU_WORD1_ALT_CONST_MASK) >> SQ_CF_ALU_WORD1_ALT_CONST_SHIFT)
+#define SQ_CF_ALU_WORD1_GET_CF_INST(sq_cf_alu_word1) \
+     ((sq_cf_alu_word1 & SQ_CF_ALU_WORD1_CF_INST_MASK) >> SQ_CF_ALU_WORD1_CF_INST_SHIFT)
+#define SQ_CF_ALU_WORD1_GET_WHOLE_QUAD_MODE(sq_cf_alu_word1) \
+     ((sq_cf_alu_word1 & SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_MASK) >> SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_SHIFT)
+#define SQ_CF_ALU_WORD1_GET_BARRIER(sq_cf_alu_word1) \
+     ((sq_cf_alu_word1 & SQ_CF_ALU_WORD1_BARRIER_MASK) >> SQ_CF_ALU_WORD1_BARRIER_SHIFT)
+
+#define SQ_CF_ALU_WORD1_SET_KCACHE_MODE1(sq_cf_alu_word1_reg, kcache_mode1) \
+     sq_cf_alu_word1_reg = (sq_cf_alu_word1_reg & ~SQ_CF_ALU_WORD1_KCACHE_MODE1_MASK) | (kcache_mode1 << SQ_CF_ALU_WORD1_KCACHE_MODE1_SHIFT)
+#define SQ_CF_ALU_WORD1_SET_KCACHE_ADDR0(sq_cf_alu_word1_reg, kcache_addr0) \
+     sq_cf_alu_word1_reg = (sq_cf_alu_word1_reg & ~SQ_CF_ALU_WORD1_KCACHE_ADDR0_MASK) | (kcache_addr0 << SQ_CF_ALU_WORD1_KCACHE_ADDR0_SHIFT)
+#define SQ_CF_ALU_WORD1_SET_KCACHE_ADDR1(sq_cf_alu_word1_reg, kcache_addr1) \
+     sq_cf_alu_word1_reg = (sq_cf_alu_word1_reg & ~SQ_CF_ALU_WORD1_KCACHE_ADDR1_MASK) | (kcache_addr1 << SQ_CF_ALU_WORD1_KCACHE_ADDR1_SHIFT)
+#define SQ_CF_ALU_WORD1_SET_COUNT(sq_cf_alu_word1_reg, count) \
+     sq_cf_alu_word1_reg = (sq_cf_alu_word1_reg & ~SQ_CF_ALU_WORD1_COUNT_MASK) | (count << SQ_CF_ALU_WORD1_COUNT_SHIFT)
+#define SQ_CF_ALU_WORD1_SET_ALT_CONST(sq_cf_alu_word1_reg, alt_const) \
+     sq_cf_alu_word1_reg = (sq_cf_alu_word1_reg & ~SQ_CF_ALU_WORD1_ALT_CONST_MASK) | (alt_const << SQ_CF_ALU_WORD1_ALT_CONST_SHIFT)
+#define SQ_CF_ALU_WORD1_SET_CF_INST(sq_cf_alu_word1_reg, cf_inst) \
+     sq_cf_alu_word1_reg = (sq_cf_alu_word1_reg & ~SQ_CF_ALU_WORD1_CF_INST_MASK) | (cf_inst << SQ_CF_ALU_WORD1_CF_INST_SHIFT)
+#define SQ_CF_ALU_WORD1_SET_WHOLE_QUAD_MODE(sq_cf_alu_word1_reg, whole_quad_mode) \
+     sq_cf_alu_word1_reg = (sq_cf_alu_word1_reg & ~SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_MASK) | (whole_quad_mode << SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_SHIFT)
+#define SQ_CF_ALU_WORD1_SET_BARRIER(sq_cf_alu_word1_reg, barrier) \
+     sq_cf_alu_word1_reg = (sq_cf_alu_word1_reg & ~SQ_CF_ALU_WORD1_BARRIER_MASK) | (barrier << SQ_CF_ALU_WORD1_BARRIER_SHIFT)
+
+#if            defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_cf_alu_word1_t {
+          unsigned int kcache_mode1                   : SQ_CF_ALU_WORD1_KCACHE_MODE1_SIZE;
+          unsigned int kcache_addr0                   : SQ_CF_ALU_WORD1_KCACHE_ADDR0_SIZE;
+          unsigned int kcache_addr1                   : SQ_CF_ALU_WORD1_KCACHE_ADDR1_SIZE;
+          unsigned int count                          : SQ_CF_ALU_WORD1_COUNT_SIZE;
+          unsigned int alt_const                      : SQ_CF_ALU_WORD1_ALT_CONST_SIZE;
+          unsigned int cf_inst                        : SQ_CF_ALU_WORD1_CF_INST_SIZE;
+          unsigned int whole_quad_mode                : SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_SIZE;
+          unsigned int barrier                        : SQ_CF_ALU_WORD1_BARRIER_SIZE;
+     } sq_cf_alu_word1_t;
+
+#elif          defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_cf_alu_word1_t {
+          unsigned int barrier                        : SQ_CF_ALU_WORD1_BARRIER_SIZE;
+          unsigned int whole_quad_mode                : SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_SIZE;
+          unsigned int cf_inst                        : SQ_CF_ALU_WORD1_CF_INST_SIZE;
+          unsigned int alt_const                      : SQ_CF_ALU_WORD1_ALT_CONST_SIZE;
+          unsigned int count                          : SQ_CF_ALU_WORD1_COUNT_SIZE;
+          unsigned int kcache_addr1                   : SQ_CF_ALU_WORD1_KCACHE_ADDR1_SIZE;
+          unsigned int kcache_addr0                   : SQ_CF_ALU_WORD1_KCACHE_ADDR0_SIZE;
+          unsigned int kcache_mode1                   : SQ_CF_ALU_WORD1_KCACHE_MODE1_SIZE;
+     } sq_cf_alu_word1_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_cf_alu_word1_t f;
+} sq_cf_alu_word1_u;
+
+
+/*
+ * SQ_CF_ALLOC_EXPORT_WORD0 struct
+ */
+
+#define SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_SIZE 13
+#define SQ_CF_ALLOC_EXPORT_WORD0_TYPE_SIZE 2
+#define SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_SIZE 7
+#define SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_SIZE 1
+#define SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_SIZE 7
+#define SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_SIZE 2
+
+#define SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_SHIFT 0
+#define SQ_CF_ALLOC_EXPORT_WORD0_TYPE_SHIFT 13
+#define SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_SHIFT 15
+#define SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_SHIFT 22
+#define SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_SHIFT 23
+#define SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_SHIFT 30
+
+#define SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_MASK 0x00001fff
+#define SQ_CF_ALLOC_EXPORT_WORD0_TYPE_MASK 0x00006000
+#define SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_MASK 0x003f8000
+#define SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_MASK 0x00400000
+#define SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_MASK 0x3f800000
+#define SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_MASK 0xc0000000
+
+#define SQ_CF_ALLOC_EXPORT_WORD0_MASK \
+     (SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_MASK | \
+      SQ_CF_ALLOC_EXPORT_WORD0_TYPE_MASK | \
+      SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_MASK | \
+      SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_MASK | \
+      SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_MASK | \
+      SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_MASK)
+
+#define SQ_CF_ALLOC_EXPORT_WORD0_DEFAULT 0xcdcdcdcd
+
+#define SQ_CF_ALLOC_EXPORT_WORD0_GET_ARRAY_BASE(sq_cf_alloc_export_word0) \
+     ((sq_cf_alloc_export_word0 & SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_MASK) >> SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD0_GET_TYPE(sq_cf_alloc_export_word0) \
+     ((sq_cf_alloc_export_word0 & SQ_CF_ALLOC_EXPORT_WORD0_TYPE_MASK) >> SQ_CF_ALLOC_EXPORT_WORD0_TYPE_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD0_GET_RW_GPR(sq_cf_alloc_export_word0) \
+     ((sq_cf_alloc_export_word0 & SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_MASK) >> SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD0_GET_RW_REL(sq_cf_alloc_export_word0) \
+     ((sq_cf_alloc_export_word0 & SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_MASK) >> SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD0_GET_INDEX_GPR(sq_cf_alloc_export_word0) \
+     ((sq_cf_alloc_export_word0 & SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_MASK) >> SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD0_GET_ELEM_SIZE(sq_cf_alloc_export_word0) \
+     ((sq_cf_alloc_export_word0 & SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_MASK) >> SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_SHIFT)
+
+#define SQ_CF_ALLOC_EXPORT_WORD0_SET_ARRAY_BASE(sq_cf_alloc_export_word0_reg, array_base) \
+     sq_cf_alloc_export_word0_reg = (sq_cf_alloc_export_word0_reg & ~SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_MASK) | (array_base << SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD0_SET_TYPE(sq_cf_alloc_export_word0_reg, type) \
+     sq_cf_alloc_export_word0_reg = (sq_cf_alloc_export_word0_reg & ~SQ_CF_ALLOC_EXPORT_WORD0_TYPE_MASK) | (type << SQ_CF_ALLOC_EXPORT_WORD0_TYPE_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD0_SET_RW_GPR(sq_cf_alloc_export_word0_reg, rw_gpr) \
+     sq_cf_alloc_export_word0_reg = (sq_cf_alloc_export_word0_reg & ~SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_MASK) | (rw_gpr << SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD0_SET_RW_REL(sq_cf_alloc_export_word0_reg, rw_rel) \
+     sq_cf_alloc_export_word0_reg = (sq_cf_alloc_export_word0_reg & ~SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_MASK) | (rw_rel << SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD0_SET_INDEX_GPR(sq_cf_alloc_export_word0_reg, index_gpr) \
+     sq_cf_alloc_export_word0_reg = (sq_cf_alloc_export_word0_reg & ~SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_MASK) | (index_gpr << SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD0_SET_ELEM_SIZE(sq_cf_alloc_export_word0_reg, elem_size) \
+     sq_cf_alloc_export_word0_reg = (sq_cf_alloc_export_word0_reg & ~SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_MASK) | (elem_size << SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_SHIFT)
+
+#if            defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_cf_alloc_export_word0_t {
+          unsigned int array_base                     : SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_SIZE;
+          unsigned int type                           : SQ_CF_ALLOC_EXPORT_WORD0_TYPE_SIZE;
+          unsigned int rw_gpr                         : SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_SIZE;
+          unsigned int rw_rel                         : SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_SIZE;
+          unsigned int index_gpr                      : SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_SIZE;
+          unsigned int elem_size                      : SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_SIZE;
+     } sq_cf_alloc_export_word0_t;
+
+#elif          defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_cf_alloc_export_word0_t {
+          unsigned int elem_size                      : SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_SIZE;
+          unsigned int index_gpr                      : SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_SIZE;
+          unsigned int rw_rel                         : SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_SIZE;
+          unsigned int rw_gpr                         : SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_SIZE;
+          unsigned int type                           : SQ_CF_ALLOC_EXPORT_WORD0_TYPE_SIZE;
+          unsigned int array_base                     : SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_SIZE;
+     } sq_cf_alloc_export_word0_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_cf_alloc_export_word0_t f;
+} sq_cf_alloc_export_word0_u;
+
+
+/*
+ * SQ_CF_ALLOC_EXPORT_WORD1 struct
+ */
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_SIZE 4
+#define SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_SIZE 1
+#define SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_SIZE 1
+#define SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_SIZE 7
+#define SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_SIZE 1
+#define SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_SIZE 1
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_SHIFT 17
+#define SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_SHIFT 21
+#define SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_SHIFT 22
+#define SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_SHIFT 23
+#define SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_SHIFT 30
+#define SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_SHIFT 31
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_MASK 0x001e0000
+#define SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_MASK 0x00200000
+#define SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_MASK 0x00400000
+#define SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_MASK 0x3f800000
+#define SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_MASK 0x40000000
+#define SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_MASK 0x80000000
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_MASK \
+     (SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_MASK | \
+      SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_MASK | \
+      SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_MASK | \
+      SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_MASK | \
+      SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_MASK | \
+      SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_MASK)
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_DEFAULT 0xcdcc0000
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_GET_BURST_COUNT(sq_cf_alloc_export_word1) \
+     ((sq_cf_alloc_export_word1 & SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_GET_END_OF_PROGRAM(sq_cf_alloc_export_word1) \
+     ((sq_cf_alloc_export_word1 & SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_GET_VALID_PIXEL_MODE(sq_cf_alloc_export_word1) \
+     ((sq_cf_alloc_export_word1 & SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_GET_CF_INST(sq_cf_alloc_export_word1) \
+     ((sq_cf_alloc_export_word1 & SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_GET_WHOLE_QUAD_MODE(sq_cf_alloc_export_word1) \
+     ((sq_cf_alloc_export_word1 & SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_GET_BARRIER(sq_cf_alloc_export_word1) \
+     ((sq_cf_alloc_export_word1 & SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_SHIFT)
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_SET_BURST_COUNT(sq_cf_alloc_export_word1_reg, burst_count) \
+     sq_cf_alloc_export_word1_reg = (sq_cf_alloc_export_word1_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_MASK) | (burst_count << SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_SET_END_OF_PROGRAM(sq_cf_alloc_export_word1_reg, end_of_program) \
+     sq_cf_alloc_export_word1_reg = (sq_cf_alloc_export_word1_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_MASK) | (end_of_program << SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_SET_VALID_PIXEL_MODE(sq_cf_alloc_export_word1_reg, valid_pixel_mode) \
+     sq_cf_alloc_export_word1_reg = (sq_cf_alloc_export_word1_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_MASK) | (valid_pixel_mode << SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_SET_CF_INST(sq_cf_alloc_export_word1_reg, cf_inst) \
+     sq_cf_alloc_export_word1_reg = (sq_cf_alloc_export_word1_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_MASK) | (cf_inst << SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_SET_WHOLE_QUAD_MODE(sq_cf_alloc_export_word1_reg, whole_quad_mode) \
+     sq_cf_alloc_export_word1_reg = (sq_cf_alloc_export_word1_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_MASK) | (whole_quad_mode << SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_SET_BARRIER(sq_cf_alloc_export_word1_reg, barrier) \
+     sq_cf_alloc_export_word1_reg = (sq_cf_alloc_export_word1_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_MASK) | (barrier << SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_SHIFT)
+
+#if            defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_cf_alloc_export_word1_t {
+          unsigned int                                : 17;
+          unsigned int burst_count                    : SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_SIZE;
+          unsigned int end_of_program                 : SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_SIZE;
+          unsigned int valid_pixel_mode               : SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_SIZE;
+          unsigned int cf_inst                        : SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_SIZE;
+          unsigned int whole_quad_mode                : SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_SIZE;
+          unsigned int barrier                        : SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_SIZE;
+     } sq_cf_alloc_export_word1_t;
+
+#elif          defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_cf_alloc_export_word1_t {
+          unsigned int barrier                        : SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_SIZE;
+          unsigned int whole_quad_mode                : SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_SIZE;
+          unsigned int cf_inst                        : SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_SIZE;
+          unsigned int valid_pixel_mode               : SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_SIZE;
+          unsigned int end_of_program                 : SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_SIZE;
+          unsigned int burst_count                    : SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_SIZE;
+          unsigned int                                : 17;
+     } sq_cf_alloc_export_word1_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_cf_alloc_export_word1_t f;
+} sq_cf_alloc_export_word1_u;
+
+
+/*
+ * SQ_CF_ALLOC_EXPORT_WORD1_BUF struct
+ */
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_SIZE 12
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_SIZE 4
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_SHIFT 0
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_SHIFT 12
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_MASK 0x00000fff
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_MASK 0x0000f000
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_MASK \
+     (SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_MASK | \
+      SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_MASK)
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_DEFAULT 0x0000cdcd
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_GET_ARRAY_SIZE(sq_cf_alloc_export_word1_buf) \
+     ((sq_cf_alloc_export_word1_buf & SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_GET_COMP_MASK(sq_cf_alloc_export_word1_buf) \
+     ((sq_cf_alloc_export_word1_buf & SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_SHIFT)
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_SET_ARRAY_SIZE(sq_cf_alloc_export_word1_buf_reg, array_size) \
+     sq_cf_alloc_export_word1_buf_reg = (sq_cf_alloc_export_word1_buf_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_MASK) | (array_size << SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_SET_COMP_MASK(sq_cf_alloc_export_word1_buf_reg, comp_mask) \
+     sq_cf_alloc_export_word1_buf_reg = (sq_cf_alloc_export_word1_buf_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_MASK) | (comp_mask << SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_SHIFT)
+
+#if            defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_cf_alloc_export_word1_buf_t {
+          unsigned int array_size                     : SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_SIZE;
+          unsigned int comp_mask                      : SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_SIZE;
+          unsigned int                                : 16;
+     } sq_cf_alloc_export_word1_buf_t;
+
+#elif          defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_cf_alloc_export_word1_buf_t {
+          unsigned int                                : 16;
+          unsigned int comp_mask                      : SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_SIZE;
+          unsigned int array_size                     : SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_SIZE;
+     } sq_cf_alloc_export_word1_buf_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_cf_alloc_export_word1_buf_t f;
+} sq_cf_alloc_export_word1_buf_u;
+
+
+/*
+ * SQ_CF_ALLOC_EXPORT_WORD1_SWIZ struct
+ */
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_SIZE 3
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_SIZE 3
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_SIZE 3
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_SIZE 3
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_SHIFT 0
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_SHIFT 3
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_SHIFT 6
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_SHIFT 9
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_MASK 0x00000007
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_MASK 0x00000038
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_MASK 0x000001c0
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_MASK 0x00000e00
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_MASK \
+     (SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_MASK | \
+      SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_MASK | \
+      SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_MASK | \
+      SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_MASK)
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_DEFAULT 0x00000dcd
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_GET_SEL_X(sq_cf_alloc_export_word1_swiz) \
+     ((sq_cf_alloc_export_word1_swiz & SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_GET_SEL_Y(sq_cf_alloc_export_word1_swiz) \
+     ((sq_cf_alloc_export_word1_swiz & SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_GET_SEL_Z(sq_cf_alloc_export_word1_swiz) \
+     ((sq_cf_alloc_export_word1_swiz & SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_GET_SEL_W(sq_cf_alloc_export_word1_swiz) \
+     ((sq_cf_alloc_export_word1_swiz & SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_SHIFT)
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SET_SEL_X(sq_cf_alloc_export_word1_swiz_reg, sel_x) \
+     sq_cf_alloc_export_word1_swiz_reg = (sq_cf_alloc_export_word1_swiz_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_MASK) | (sel_x << SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SET_SEL_Y(sq_cf_alloc_export_word1_swiz_reg, sel_y) \
+     sq_cf_alloc_export_word1_swiz_reg = (sq_cf_alloc_export_word1_swiz_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_MASK) | (sel_y << SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SET_SEL_Z(sq_cf_alloc_export_word1_swiz_reg, sel_z) \
+     sq_cf_alloc_export_word1_swiz_reg = (sq_cf_alloc_export_word1_swiz_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_MASK) | (sel_z << SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SET_SEL_W(sq_cf_alloc_export_word1_swiz_reg, sel_w) \
+     sq_cf_alloc_export_word1_swiz_reg = (sq_cf_alloc_export_word1_swiz_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_MASK) | (sel_w << SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_SHIFT)
+
+#if            defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_cf_alloc_export_word1_swiz_t {
+          unsigned int sel_x                          : SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_SIZE;
+          unsigned int sel_y                          : SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_SIZE;
+          unsigned int sel_z                          : SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_SIZE;
+          unsigned int sel_w                          : SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_SIZE;
+          unsigned int                                : 20;
+     } sq_cf_alloc_export_word1_swiz_t;
+
+#elif          defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_cf_alloc_export_word1_swiz_t {
+          unsigned int                                : 20;
+          unsigned int sel_w                          : SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_SIZE;
+          unsigned int sel_z                          : SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_SIZE;
+          unsigned int sel_y                          : SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_SIZE;
+          unsigned int sel_x                          : SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_SIZE;
+     } sq_cf_alloc_export_word1_swiz_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_cf_alloc_export_word1_swiz_t f;
+} sq_cf_alloc_export_word1_swiz_u;
+
+
+/*
+ * SQ_ALU_WORD0 struct
+ */
+
+#define SQ_ALU_WORD0_SRC0_SEL_SIZE     9
+#define SQ_ALU_WORD0_SRC0_REL_SIZE     1
+#define SQ_ALU_WORD0_SRC0_CHAN_SIZE    2
+#define SQ_ALU_WORD0_SRC0_NEG_SIZE     1
+#define SQ_ALU_WORD0_SRC1_SEL_SIZE     9
+#define SQ_ALU_WORD0_SRC1_REL_SIZE     1
+#define SQ_ALU_WORD0_SRC1_CHAN_SIZE    2
+#define SQ_ALU_WORD0_SRC1_NEG_SIZE     1
+#define SQ_ALU_WORD0_INDEX_MODE_SIZE   3
+#define SQ_ALU_WORD0_PRED_SEL_SIZE     2
+#define SQ_ALU_WORD0_LAST_SIZE         1
+
+#define SQ_ALU_WORD0_SRC0_SEL_SHIFT    0
+#define SQ_ALU_WORD0_SRC0_REL_SHIFT    9
+#define SQ_ALU_WORD0_SRC0_CHAN_SHIFT   10
+#define SQ_ALU_WORD0_SRC0_NEG_SHIFT    12
+#define SQ_ALU_WORD0_SRC1_SEL_SHIFT    13
+#define SQ_ALU_WORD0_SRC1_REL_SHIFT    22
+#define SQ_ALU_WORD0_SRC1_CHAN_SHIFT   23
+#define SQ_ALU_WORD0_SRC1_NEG_SHIFT    25
+#define SQ_ALU_WORD0_INDEX_MODE_SHIFT  26
+#define SQ_ALU_WORD0_PRED_SEL_SHIFT    29
+#define SQ_ALU_WORD0_LAST_SHIFT        31
+
+#define SQ_ALU_WORD0_SRC0_SEL_MASK     0x000001ff
+#define SQ_ALU_WORD0_SRC0_REL_MASK     0x00000200
+#define SQ_ALU_WORD0_SRC0_CHAN_MASK    0x00000c00
+#define SQ_ALU_WORD0_SRC0_NEG_MASK     0x00001000
+#define SQ_ALU_WORD0_SRC1_SEL_MASK     0x003fe000
+#define SQ_ALU_WORD0_SRC1_REL_MASK     0x00400000
+#define SQ_ALU_WORD0_SRC1_CHAN_MASK    0x01800000
+#define SQ_ALU_WORD0_SRC1_NEG_MASK     0x02000000
+#define SQ_ALU_WORD0_INDEX_MODE_MASK   0x1c000000
+#define SQ_ALU_WORD0_PRED_SEL_MASK     0x60000000
+#define SQ_ALU_WORD0_LAST_MASK         0x80000000
+
+#define SQ_ALU_WORD0_MASK \
+     (SQ_ALU_WORD0_SRC0_SEL_MASK | \
+      SQ_ALU_WORD0_SRC0_REL_MASK | \
+      SQ_ALU_WORD0_SRC0_CHAN_MASK | \
+      SQ_ALU_WORD0_SRC0_NEG_MASK | \
+      SQ_ALU_WORD0_SRC1_SEL_MASK | \
+      SQ_ALU_WORD0_SRC1_REL_MASK | \
+      SQ_ALU_WORD0_SRC1_CHAN_MASK | \
+      SQ_ALU_WORD0_SRC1_NEG_MASK | \
+      SQ_ALU_WORD0_INDEX_MODE_MASK | \
+      SQ_ALU_WORD0_PRED_SEL_MASK | \
+      SQ_ALU_WORD0_LAST_MASK)
+
+#define SQ_ALU_WORD0_DEFAULT           0xcdcdcdcd
+
+#define SQ_ALU_WORD0_GET_SRC0_SEL(sq_alu_word0) \
+     ((sq_alu_word0 & SQ_ALU_WORD0_SRC0_SEL_MASK) >> SQ_ALU_WORD0_SRC0_SEL_SHIFT)
+#define SQ_ALU_WORD0_GET_SRC0_REL(sq_alu_word0) \
+     ((sq_alu_word0 & SQ_ALU_WORD0_SRC0_REL_MASK) >> SQ_ALU_WORD0_SRC0_REL_SHIFT)
+#define SQ_ALU_WORD0_GET_SRC0_CHAN(sq_alu_word0) \
+     ((sq_alu_word0 & SQ_ALU_WORD0_SRC0_CHAN_MASK) >> SQ_ALU_WORD0_SRC0_CHAN_SHIFT)
+#define SQ_ALU_WORD0_GET_SRC0_NEG(sq_alu_word0) \
+     ((sq_alu_word0 & SQ_ALU_WORD0_SRC0_NEG_MASK) >> SQ_ALU_WORD0_SRC0_NEG_SHIFT)
+#define SQ_ALU_WORD0_GET_SRC1_SEL(sq_alu_word0) \
+     ((sq_alu_word0 & SQ_ALU_WORD0_SRC1_SEL_MASK) >> SQ_ALU_WORD0_SRC1_SEL_SHIFT)
+#define SQ_ALU_WORD0_GET_SRC1_REL(sq_alu_word0) \
+     ((sq_alu_word0 & SQ_ALU_WORD0_SRC1_REL_MASK) >> SQ_ALU_WORD0_SRC1_REL_SHIFT)
+#define SQ_ALU_WORD0_GET_SRC1_CHAN(sq_alu_word0) \
+     ((sq_alu_word0 & SQ_ALU_WORD0_SRC1_CHAN_MASK) >> SQ_ALU_WORD0_SRC1_CHAN_SHIFT)
+#define SQ_ALU_WORD0_GET_SRC1_NEG(sq_alu_word0) \
+     ((sq_alu_word0 & SQ_ALU_WORD0_SRC1_NEG_MASK) >> SQ_ALU_WORD0_SRC1_NEG_SHIFT)
+#define SQ_ALU_WORD0_GET_INDEX_MODE(sq_alu_word0) \
+     ((sq_alu_word0 & SQ_ALU_WORD0_INDEX_MODE_MASK) >> SQ_ALU_WORD0_INDEX_MODE_SHIFT)
+#define SQ_ALU_WORD0_GET_PRED_SEL(sq_alu_word0) \
+     ((sq_alu_word0 & SQ_ALU_WORD0_PRED_SEL_MASK) >> SQ_ALU_WORD0_PRED_SEL_SHIFT)
+#define SQ_ALU_WORD0_GET_LAST(sq_alu_word0) \
+     ((sq_alu_word0 & SQ_ALU_WORD0_LAST_MASK) >> SQ_ALU_WORD0_LAST_SHIFT)
+
+#define SQ_ALU_WORD0_SET_SRC0_SEL(sq_alu_word0_reg, src0_sel) \
+     sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_SRC0_SEL_MASK) | (src0_sel << SQ_ALU_WORD0_SRC0_SEL_SHIFT)
+#define SQ_ALU_WORD0_SET_SRC0_REL(sq_alu_word0_reg, src0_rel) \
+     sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_SRC0_REL_MASK) | (src0_rel << SQ_ALU_WORD0_SRC0_REL_SHIFT)
+#define SQ_ALU_WORD0_SET_SRC0_CHAN(sq_alu_word0_reg, src0_chan) \
+     sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_SRC0_CHAN_MASK) | (src0_chan << SQ_ALU_WORD0_SRC0_CHAN_SHIFT)
+#define SQ_ALU_WORD0_SET_SRC0_NEG(sq_alu_word0_reg, src0_neg) \
+     sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_SRC0_NEG_MASK) | (src0_neg << SQ_ALU_WORD0_SRC0_NEG_SHIFT)
+#define SQ_ALU_WORD0_SET_SRC1_SEL(sq_alu_word0_reg, src1_sel) \
+     sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_SRC1_SEL_MASK) | (src1_sel << SQ_ALU_WORD0_SRC1_SEL_SHIFT)
+#define SQ_ALU_WORD0_SET_SRC1_REL(sq_alu_word0_reg, src1_rel) \
+     sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_SRC1_REL_MASK) | (src1_rel << SQ_ALU_WORD0_SRC1_REL_SHIFT)
+#define SQ_ALU_WORD0_SET_SRC1_CHAN(sq_alu_word0_reg, src1_chan) \
+     sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_SRC1_CHAN_MASK) | (src1_chan << SQ_ALU_WORD0_SRC1_CHAN_SHIFT)
+#define SQ_ALU_WORD0_SET_SRC1_NEG(sq_alu_word0_reg, src1_neg) \
+     sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_SRC1_NEG_MASK) | (src1_neg << SQ_ALU_WORD0_SRC1_NEG_SHIFT)
+#define SQ_ALU_WORD0_SET_INDEX_MODE(sq_alu_word0_reg, index_mode) \
+     sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_INDEX_MODE_MASK) | (index_mode << SQ_ALU_WORD0_INDEX_MODE_SHIFT)
+#define SQ_ALU_WORD0_SET_PRED_SEL(sq_alu_word0_reg, pred_sel) \
+     sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_PRED_SEL_MASK) | (pred_sel << SQ_ALU_WORD0_PRED_SEL_SHIFT)
+#define SQ_ALU_WORD0_SET_LAST(sq_alu_word0_reg, last) \
+     sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_LAST_MASK) | (last << SQ_ALU_WORD0_LAST_SHIFT)
+
+#if            defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_alu_word0_t {
+          unsigned int src0_sel                       : SQ_ALU_WORD0_SRC0_SEL_SIZE;
+          unsigned int src0_rel                       : SQ_ALU_WORD0_SRC0_REL_SIZE;
+          unsigned int src0_chan                      : SQ_ALU_WORD0_SRC0_CHAN_SIZE;
+          unsigned int src0_neg                       : SQ_ALU_WORD0_SRC0_NEG_SIZE;
+          unsigned int src1_sel                       : SQ_ALU_WORD0_SRC1_SEL_SIZE;
+          unsigned int src1_rel                       : SQ_ALU_WORD0_SRC1_REL_SIZE;
+          unsigned int src1_chan                      : SQ_ALU_WORD0_SRC1_CHAN_SIZE;
+          unsigned int src1_neg                       : SQ_ALU_WORD0_SRC1_NEG_SIZE;
+          unsigned int index_mode                     : SQ_ALU_WORD0_INDEX_MODE_SIZE;
+          unsigned int pred_sel                       : SQ_ALU_WORD0_PRED_SEL_SIZE;
+          unsigned int last                           : SQ_ALU_WORD0_LAST_SIZE;
+     } sq_alu_word0_t;
+
+#elif          defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_alu_word0_t {
+          unsigned int last                           : SQ_ALU_WORD0_LAST_SIZE;
+          unsigned int pred_sel                       : SQ_ALU_WORD0_PRED_SEL_SIZE;
+          unsigned int index_mode                     : SQ_ALU_WORD0_INDEX_MODE_SIZE;
+          unsigned int src1_neg                       : SQ_ALU_WORD0_SRC1_NEG_SIZE;
+          unsigned int src1_chan                      : SQ_ALU_WORD0_SRC1_CHAN_SIZE;
+          unsigned int src1_rel                       : SQ_ALU_WORD0_SRC1_REL_SIZE;
+          unsigned int src1_sel                       : SQ_ALU_WORD0_SRC1_SEL_SIZE;
+          unsigned int src0_neg                       : SQ_ALU_WORD0_SRC0_NEG_SIZE;
+          unsigned int src0_chan                      : SQ_ALU_WORD0_SRC0_CHAN_SIZE;
+          unsigned int src0_rel                       : SQ_ALU_WORD0_SRC0_REL_SIZE;
+          unsigned int src0_sel                       : SQ_ALU_WORD0_SRC0_SEL_SIZE;
+     } sq_alu_word0_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_alu_word0_t f;
+} sq_alu_word0_u;
+
+
+/*
+ * SQ_ALU_WORD1 struct
+ */
+
+#define SQ_ALU_WORD1_ENCODING_SIZE     3
+#define SQ_ALU_WORD1_BANK_SWIZZLE_SIZE 3
+#define SQ_ALU_WORD1_DST_GPR_SIZE      7
+#define SQ_ALU_WORD1_DST_REL_SIZE      1
+#define SQ_ALU_WORD1_DST_CHAN_SIZE     2
+#define SQ_ALU_WORD1_CLAMP_SIZE        1
+
+#define SQ_ALU_WORD1_ENCODING_SHIFT    15
+#define SQ_ALU_WORD1_BANK_SWIZZLE_SHIFT 18
+#define SQ_ALU_WORD1_DST_GPR_SHIFT     21
+#define SQ_ALU_WORD1_DST_REL_SHIFT     28
+#define SQ_ALU_WORD1_DST_CHAN_SHIFT    29
+#define SQ_ALU_WORD1_CLAMP_SHIFT       31
+
+#define SQ_ALU_WORD1_ENCODING_MASK     0x00038000
+#define SQ_ALU_WORD1_BANK_SWIZZLE_MASK 0x001c0000
+#define SQ_ALU_WORD1_DST_GPR_MASK      0x0fe00000
+#define SQ_ALU_WORD1_DST_REL_MASK      0x10000000
+#define SQ_ALU_WORD1_DST_CHAN_MASK     0x60000000
+#define SQ_ALU_WORD1_CLAMP_MASK        0x80000000
+
+#define SQ_ALU_WORD1_MASK \
+     (SQ_ALU_WORD1_ENCODING_MASK | \
+      SQ_ALU_WORD1_BANK_SWIZZLE_MASK | \
+      SQ_ALU_WORD1_DST_GPR_MASK | \
+      SQ_ALU_WORD1_DST_REL_MASK | \
+      SQ_ALU_WORD1_DST_CHAN_MASK | \
+      SQ_ALU_WORD1_CLAMP_MASK)
+
+#define SQ_ALU_WORD1_DEFAULT           0xcdcd8000
+
+#define SQ_ALU_WORD1_GET_ENCODING(sq_alu_word1) \
+     ((sq_alu_word1 & SQ_ALU_WORD1_ENCODING_MASK) >> SQ_ALU_WORD1_ENCODING_SHIFT)
+#define SQ_ALU_WORD1_GET_BANK_SWIZZLE(sq_alu_word1) \
+     ((sq_alu_word1 & SQ_ALU_WORD1_BANK_SWIZZLE_MASK) >> SQ_ALU_WORD1_BANK_SWIZZLE_SHIFT)
+#define SQ_ALU_WORD1_GET_DST_GPR(sq_alu_word1) \
+     ((sq_alu_word1 & SQ_ALU_WORD1_DST_GPR_MASK) >> SQ_ALU_WORD1_DST_GPR_SHIFT)
+#define SQ_ALU_WORD1_GET_DST_REL(sq_alu_word1) \
+     ((sq_alu_word1 & SQ_ALU_WORD1_DST_REL_MASK) >> SQ_ALU_WORD1_DST_REL_SHIFT)
+#define SQ_ALU_WORD1_GET_DST_CHAN(sq_alu_word1) \
+     ((sq_alu_word1 & SQ_ALU_WORD1_DST_CHAN_MASK) >> SQ_ALU_WORD1_DST_CHAN_SHIFT)
+#define SQ_ALU_WORD1_GET_CLAMP(sq_alu_word1) \
+     ((sq_alu_word1 & SQ_ALU_WORD1_CLAMP_MASK) >> SQ_ALU_WORD1_CLAMP_SHIFT)
+
+#define SQ_ALU_WORD1_SET_ENCODING(sq_alu_word1_reg, encoding) \
+     sq_alu_word1_reg = (sq_alu_word1_reg & ~SQ_ALU_WORD1_ENCODING_MASK) | (encoding << SQ_ALU_WORD1_ENCODING_SHIFT)
+#define SQ_ALU_WORD1_SET_BANK_SWIZZLE(sq_alu_word1_reg, bank_swizzle) \
+     sq_alu_word1_reg = (sq_alu_word1_reg & ~SQ_ALU_WORD1_BANK_SWIZZLE_MASK) | (bank_swizzle << SQ_ALU_WORD1_BANK_SWIZZLE_SHIFT)
+#define SQ_ALU_WORD1_SET_DST_GPR(sq_alu_word1_reg, dst_gpr) \
+     sq_alu_word1_reg = (sq_alu_word1_reg & ~SQ_ALU_WORD1_DST_GPR_MASK) | (dst_gpr << SQ_ALU_WORD1_DST_GPR_SHIFT)
+#define SQ_ALU_WORD1_SET_DST_REL(sq_alu_word1_reg, dst_rel) \
+     sq_alu_word1_reg = (sq_alu_word1_reg & ~SQ_ALU_WORD1_DST_REL_MASK) | (dst_rel << SQ_ALU_WORD1_DST_REL_SHIFT)
+#define SQ_ALU_WORD1_SET_DST_CHAN(sq_alu_word1_reg, dst_chan) \
+     sq_alu_word1_reg = (sq_alu_word1_reg & ~SQ_ALU_WORD1_DST_CHAN_MASK) | (dst_chan << SQ_ALU_WORD1_DST_CHAN_SHIFT)
+#define SQ_ALU_WORD1_SET_CLAMP(sq_alu_word1_reg, clamp) \
+     sq_alu_word1_reg = (sq_alu_word1_reg & ~SQ_ALU_WORD1_CLAMP_MASK) | (clamp << SQ_ALU_WORD1_CLAMP_SHIFT)
+
+#if            defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_alu_word1_t {
+          unsigned int                                : 15;
+          unsigned int encoding                       : SQ_ALU_WORD1_ENCODING_SIZE;
+          unsigned int bank_swizzle                   : SQ_ALU_WORD1_BANK_SWIZZLE_SIZE;
+          unsigned int dst_gpr                        : SQ_ALU_WORD1_DST_GPR_SIZE;
+          unsigned int dst_rel                        : SQ_ALU_WORD1_DST_REL_SIZE;
+          unsigned int dst_chan                       : SQ_ALU_WORD1_DST_CHAN_SIZE;
+          unsigned int clamp                          : SQ_ALU_WORD1_CLAMP_SIZE;
+     } sq_alu_word1_t;
+
+#elif          defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_alu_word1_t {
+          unsigned int clamp                          : SQ_ALU_WORD1_CLAMP_SIZE;
+          unsigned int dst_chan                       : SQ_ALU_WORD1_DST_CHAN_SIZE;
+          unsigned int dst_rel                        : SQ_ALU_WORD1_DST_REL_SIZE;
+          unsigned int dst_gpr                        : SQ_ALU_WORD1_DST_GPR_SIZE;
+          unsigned int bank_swizzle                   : SQ_ALU_WORD1_BANK_SWIZZLE_SIZE;
+          unsigned int encoding                       : SQ_ALU_WORD1_ENCODING_SIZE;
+          unsigned int                                : 15;
+     } sq_alu_word1_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_alu_word1_t f;
+} sq_alu_word1_u;
+
+
+/*
+ * SQ_ALU_WORD1_OP2_V2 struct
+ */
+
+#define SQ_ALU_WORD1_OP2_V2_SRC0_ABS_SIZE 1
+#define SQ_ALU_WORD1_OP2_V2_SRC1_ABS_SIZE 1
+#define SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_SIZE 1
+#define SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_SIZE 1
+#define SQ_ALU_WORD1_OP2_V2_WRITE_MASK_SIZE 1
+#define SQ_ALU_WORD1_OP2_V2_OMOD_SIZE  2
+#define SQ_ALU_WORD1_OP2_V2_ALU_INST_SIZE 11
+
+#define SQ_ALU_WORD1_OP2_V2_SRC0_ABS_SHIFT 0
+#define SQ_ALU_WORD1_OP2_V2_SRC1_ABS_SHIFT 1
+#define SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_SHIFT 2
+#define SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_SHIFT 3
+#define SQ_ALU_WORD1_OP2_V2_WRITE_MASK_SHIFT 4
+#define SQ_ALU_WORD1_OP2_V2_OMOD_SHIFT 5
+#define SQ_ALU_WORD1_OP2_V2_ALU_INST_SHIFT 7
+
+#define SQ_ALU_WORD1_OP2_V2_SRC0_ABS_MASK 0x00000001
+#define SQ_ALU_WORD1_OP2_V2_SRC1_ABS_MASK 0x00000002
+#define SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_MASK 0x00000004
+#define SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_MASK 0x00000008
+#define SQ_ALU_WORD1_OP2_V2_WRITE_MASK_MASK 0x00000010
+#define SQ_ALU_WORD1_OP2_V2_OMOD_MASK  0x00000060
+#define SQ_ALU_WORD1_OP2_V2_ALU_INST_MASK 0x0003ff80
+
+#define SQ_ALU_WORD1_OP2_V2_MASK \
+     (SQ_ALU_WORD1_OP2_V2_SRC0_ABS_MASK | \
+      SQ_ALU_WORD1_OP2_V2_SRC1_ABS_MASK | \
+      SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_MASK | \
+      SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_MASK | \
+      SQ_ALU_WORD1_OP2_V2_WRITE_MASK_MASK | \
+      SQ_ALU_WORD1_OP2_V2_OMOD_MASK | \
+      SQ_ALU_WORD1_OP2_V2_ALU_INST_MASK)
+
+#define SQ_ALU_WORD1_OP2_V2_DEFAULT    0x0001cdcd
+
+#define SQ_ALU_WORD1_OP2_V2_GET_SRC0_ABS(sq_alu_word1_op2_v2) \
+     ((sq_alu_word1_op2_v2 & SQ_ALU_WORD1_OP2_V2_SRC0_ABS_MASK) >> SQ_ALU_WORD1_OP2_V2_SRC0_ABS_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_GET_SRC1_ABS(sq_alu_word1_op2_v2) \
+     ((sq_alu_word1_op2_v2 & SQ_ALU_WORD1_OP2_V2_SRC1_ABS_MASK) >> SQ_ALU_WORD1_OP2_V2_SRC1_ABS_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_GET_UPDATE_EXECUTE_MASK(sq_alu_word1_op2_v2) \
+     ((sq_alu_word1_op2_v2 & SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_MASK) >> SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_GET_UPDATE_PRED(sq_alu_word1_op2_v2) \
+     ((sq_alu_word1_op2_v2 & SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_MASK) >> SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_GET_WRITE_MASK(sq_alu_word1_op2_v2) \
+     ((sq_alu_word1_op2_v2 & SQ_ALU_WORD1_OP2_V2_WRITE_MASK_MASK) >> SQ_ALU_WORD1_OP2_V2_WRITE_MASK_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_GET_OMOD(sq_alu_word1_op2_v2) \
+     ((sq_alu_word1_op2_v2 & SQ_ALU_WORD1_OP2_V2_OMOD_MASK) >> SQ_ALU_WORD1_OP2_V2_OMOD_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_GET_ALU_INST(sq_alu_word1_op2_v2) \
+     ((sq_alu_word1_op2_v2 & SQ_ALU_WORD1_OP2_V2_ALU_INST_MASK) >> SQ_ALU_WORD1_OP2_V2_ALU_INST_SHIFT)
+
+#define SQ_ALU_WORD1_OP2_V2_SET_SRC0_ABS(sq_alu_word1_op2_v2_reg, src0_abs) \
+     sq_alu_word1_op2_v2_reg = (sq_alu_word1_op2_v2_reg & ~SQ_ALU_WORD1_OP2_V2_SRC0_ABS_MASK) | (src0_abs << SQ_ALU_WORD1_OP2_V2_SRC0_ABS_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_SET_SRC1_ABS(sq_alu_word1_op2_v2_reg, src1_abs) \
+     sq_alu_word1_op2_v2_reg = (sq_alu_word1_op2_v2_reg & ~SQ_ALU_WORD1_OP2_V2_SRC1_ABS_MASK) | (src1_abs << SQ_ALU_WORD1_OP2_V2_SRC1_ABS_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_SET_UPDATE_EXECUTE_MASK(sq_alu_word1_op2_v2_reg, update_execute_mask) \
+     sq_alu_word1_op2_v2_reg = (sq_alu_word1_op2_v2_reg & ~SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_MASK) | (update_execute_mask << SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_SET_UPDATE_PRED(sq_alu_word1_op2_v2_reg, update_pred) \
+     sq_alu_word1_op2_v2_reg = (sq_alu_word1_op2_v2_reg & ~SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_MASK) | (update_pred << SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_SET_WRITE_MASK(sq_alu_word1_op2_v2_reg, write_mask) \
+     sq_alu_word1_op2_v2_reg = (sq_alu_word1_op2_v2_reg & ~SQ_ALU_WORD1_OP2_V2_WRITE_MASK_MASK) | (write_mask << SQ_ALU_WORD1_OP2_V2_WRITE_MASK_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_SET_OMOD(sq_alu_word1_op2_v2_reg, omod) \
+     sq_alu_word1_op2_v2_reg = (sq_alu_word1_op2_v2_reg & ~SQ_ALU_WORD1_OP2_V2_OMOD_MASK) | (omod << SQ_ALU_WORD1_OP2_V2_OMOD_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_SET_ALU_INST(sq_alu_word1_op2_v2_reg, alu_inst) \
+     sq_alu_word1_op2_v2_reg = (sq_alu_word1_op2_v2_reg & ~SQ_ALU_WORD1_OP2_V2_ALU_INST_MASK) | (alu_inst << SQ_ALU_WORD1_OP2_V2_ALU_INST_SHIFT)
+
+#if            defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_alu_word1_op2_v2_t {
+          unsigned int src0_abs                       : SQ_ALU_WORD1_OP2_V2_SRC0_ABS_SIZE;
+          unsigned int src1_abs                       : SQ_ALU_WORD1_OP2_V2_SRC1_ABS_SIZE;
+          unsigned int update_execute_mask            : SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_SIZE;
+          unsigned int update_pred                    : SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_SIZE;
+          unsigned int write_mask                     : SQ_ALU_WORD1_OP2_V2_WRITE_MASK_SIZE;
+          unsigned int omod                           : SQ_ALU_WORD1_OP2_V2_OMOD_SIZE;
+          unsigned int alu_inst                       : SQ_ALU_WORD1_OP2_V2_ALU_INST_SIZE;
+          unsigned int                                : 14;
+     } sq_alu_word1_op2_v2_t;
+
+#elif          defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_alu_word1_op2_v2_t {
+          unsigned int                                : 14;
+          unsigned int alu_inst                       : SQ_ALU_WORD1_OP2_V2_ALU_INST_SIZE;
+          unsigned int omod                           : SQ_ALU_WORD1_OP2_V2_OMOD_SIZE;
+          unsigned int write_mask                     : SQ_ALU_WORD1_OP2_V2_WRITE_MASK_SIZE;
+          unsigned int update_pred                    : SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_SIZE;
+          unsigned int update_execute_mask            : SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_SIZE;
+          unsigned int src1_abs                       : SQ_ALU_WORD1_OP2_V2_SRC1_ABS_SIZE;
+          unsigned int src0_abs                       : SQ_ALU_WORD1_OP2_V2_SRC0_ABS_SIZE;
+     } sq_alu_word1_op2_v2_t;
+
+#endif
+
+#if            defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_alu_word1_op2_r6xx_t {
+          unsigned int src0_abs                       : SQ_ALU_WORD1_OP2_V2_SRC0_ABS_SIZE;
+          unsigned int src1_abs                       : SQ_ALU_WORD1_OP2_V2_SRC1_ABS_SIZE;
+          unsigned int update_execute_mask            : SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_SIZE;
+          unsigned int update_pred                    : SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_SIZE;
+          unsigned int write_mask                     : SQ_ALU_WORD1_OP2_V2_WRITE_MASK_SIZE;
+          unsigned int fog_export                     : 1;
+          unsigned int omod                           : SQ_ALU_WORD1_OP2_V2_OMOD_SIZE;
+          unsigned int alu_inst                       : 10;
+          unsigned int                                : 14;
+     } sq_alu_word1_op2_v1_t;
+
+#elif          defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_alu_word1_op2_r6xx_t {
+          unsigned int                                : 14;
+          unsigned int alu_inst                       : 10;
+          unsigned int omod                           : SQ_ALU_WORD1_OP2_V2_OMOD_SIZE;
+          unsigned int fog_export                     : 1;
+          unsigned int write_mask                     : SQ_ALU_WORD1_OP2_V2_WRITE_MASK_SIZE;
+          unsigned int update_pred                    : SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_SIZE;
+          unsigned int update_execute_mask            : SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_SIZE;
+          unsigned int src1_abs                       : SQ_ALU_WORD1_OP2_V2_SRC1_ABS_SIZE;
+          unsigned int src0_abs                       : SQ_ALU_WORD1_OP2_V2_SRC0_ABS_SIZE;
+     } sq_alu_word1_op2_v1_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_alu_word1_op2_v2_t f;
+     sq_alu_word1_op2_v1_t f6;
+} sq_alu_word1_op2_v2_u;
+
+
+/*
+ * SQ_ALU_WORD1_OP3 struct
+ */
+
+#define SQ_ALU_WORD1_OP3_SRC2_SEL_SIZE 9
+#define SQ_ALU_WORD1_OP3_SRC2_REL_SIZE 1
+#define SQ_ALU_WORD1_OP3_SRC2_CHAN_SIZE 2
+#define SQ_ALU_WORD1_OP3_SRC2_NEG_SIZE 1
+#define SQ_ALU_WORD1_OP3_ALU_INST_SIZE 5
+
+#define SQ_ALU_WORD1_OP3_SRC2_SEL_SHIFT 0
+#define SQ_ALU_WORD1_OP3_SRC2_REL_SHIFT 9
+#define SQ_ALU_WORD1_OP3_SRC2_CHAN_SHIFT 10
+#define SQ_ALU_WORD1_OP3_SRC2_NEG_SHIFT 12
+#define SQ_ALU_WORD1_OP3_ALU_INST_SHIFT 13
+
+#define SQ_ALU_WORD1_OP3_SRC2_SEL_MASK 0x000001ff
+#define SQ_ALU_WORD1_OP3_SRC2_REL_MASK 0x00000200
+#define SQ_ALU_WORD1_OP3_SRC2_CHAN_MASK 0x00000c00
+#define SQ_ALU_WORD1_OP3_SRC2_NEG_MASK 0x00001000
+#define SQ_ALU_WORD1_OP3_ALU_INST_MASK 0x0003e000
+
+#define SQ_ALU_WORD1_OP3_MASK \
+     (SQ_ALU_WORD1_OP3_SRC2_SEL_MASK | \
+      SQ_ALU_WORD1_OP3_SRC2_REL_MASK | \
+      SQ_ALU_WORD1_OP3_SRC2_CHAN_MASK | \
+      SQ_ALU_WORD1_OP3_SRC2_NEG_MASK | \
+      SQ_ALU_WORD1_OP3_ALU_INST_MASK)
+
+#define SQ_ALU_WORD1_OP3_DEFAULT       0x0001cdcd
+
+#define SQ_ALU_WORD1_OP3_GET_SRC2_SEL(sq_alu_word1_op3) \
+     ((sq_alu_word1_op3 & SQ_ALU_WORD1_OP3_SRC2_SEL_MASK) >> SQ_ALU_WORD1_OP3_SRC2_SEL_SHIFT)
+#define SQ_ALU_WORD1_OP3_GET_SRC2_REL(sq_alu_word1_op3) \
+     ((sq_alu_word1_op3 & SQ_ALU_WORD1_OP3_SRC2_REL_MASK) >> SQ_ALU_WORD1_OP3_SRC2_REL_SHIFT)
+#define SQ_ALU_WORD1_OP3_GET_SRC2_CHAN(sq_alu_word1_op3) \
+     ((sq_alu_word1_op3 & SQ_ALU_WORD1_OP3_SRC2_CHAN_MASK) >> SQ_ALU_WORD1_OP3_SRC2_CHAN_SHIFT)
+#define SQ_ALU_WORD1_OP3_GET_SRC2_NEG(sq_alu_word1_op3) \
+     ((sq_alu_word1_op3 & SQ_ALU_WORD1_OP3_SRC2_NEG_MASK) >> SQ_ALU_WORD1_OP3_SRC2_NEG_SHIFT)
+#define SQ_ALU_WORD1_OP3_GET_ALU_INST(sq_alu_word1_op3) \
+     ((sq_alu_word1_op3 & SQ_ALU_WORD1_OP3_ALU_INST_MASK) >> SQ_ALU_WORD1_OP3_ALU_INST_SHIFT)
+
+#define SQ_ALU_WORD1_OP3_SET_SRC2_SEL(sq_alu_word1_op3_reg, src2_sel) \
+     sq_alu_word1_op3_reg = (sq_alu_word1_op3_reg & ~SQ_ALU_WORD1_OP3_SRC2_SEL_MASK) | (src2_sel << SQ_ALU_WORD1_OP3_SRC2_SEL_SHIFT)
+#define SQ_ALU_WORD1_OP3_SET_SRC2_REL(sq_alu_word1_op3_reg, src2_rel) \
+     sq_alu_word1_op3_reg = (sq_alu_word1_op3_reg & ~SQ_ALU_WORD1_OP3_SRC2_REL_MASK) | (src2_rel << SQ_ALU_WORD1_OP3_SRC2_REL_SHIFT)
+#define SQ_ALU_WORD1_OP3_SET_SRC2_CHAN(sq_alu_word1_op3_reg, src2_chan) \
+     sq_alu_word1_op3_reg = (sq_alu_word1_op3_reg & ~SQ_ALU_WORD1_OP3_SRC2_CHAN_MASK) | (src2_chan << SQ_ALU_WORD1_OP3_SRC2_CHAN_SHIFT)
+#define SQ_ALU_WORD1_OP3_SET_SRC2_NEG(sq_alu_word1_op3_reg, src2_neg) \
+     sq_alu_word1_op3_reg = (sq_alu_word1_op3_reg & ~SQ_ALU_WORD1_OP3_SRC2_NEG_MASK) | (src2_neg << SQ_ALU_WORD1_OP3_SRC2_NEG_SHIFT)
+#define SQ_ALU_WORD1_OP3_SET_ALU_INST(sq_alu_word1_op3_reg, alu_inst) \
+     sq_alu_word1_op3_reg = (sq_alu_word1_op3_reg & ~SQ_ALU_WORD1_OP3_ALU_INST_MASK) | (alu_inst << SQ_ALU_WORD1_OP3_ALU_INST_SHIFT)
+
+#if            defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_alu_word1_op3_t {
+          unsigned int src2_sel                       : SQ_ALU_WORD1_OP3_SRC2_SEL_SIZE;
+          unsigned int src2_rel                       : SQ_ALU_WORD1_OP3_SRC2_REL_SIZE;
+          unsigned int src2_chan                      : SQ_ALU_WORD1_OP3_SRC2_CHAN_SIZE;
+          unsigned int src2_neg                       : SQ_ALU_WORD1_OP3_SRC2_NEG_SIZE;
+          unsigned int alu_inst                       : SQ_ALU_WORD1_OP3_ALU_INST_SIZE;
+          unsigned int                                : 14;
+     } sq_alu_word1_op3_t;
+
+#elif          defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_alu_word1_op3_t {
+          unsigned int                                : 14;
+          unsigned int alu_inst                       : SQ_ALU_WORD1_OP3_ALU_INST_SIZE;
+          unsigned int src2_neg                       : SQ_ALU_WORD1_OP3_SRC2_NEG_SIZE;
+          unsigned int src2_chan                      : SQ_ALU_WORD1_OP3_SRC2_CHAN_SIZE;
+          unsigned int src2_rel                       : SQ_ALU_WORD1_OP3_SRC2_REL_SIZE;
+          unsigned int src2_sel                       : SQ_ALU_WORD1_OP3_SRC2_SEL_SIZE;
+     } sq_alu_word1_op3_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_alu_word1_op3_t f;
+} sq_alu_word1_op3_u;
+
+
+/*
+ * SQ_TEX_WORD0 struct
+ */
+
+#define SQ_TEX_WORD0_TEX_INST_SIZE     5
+#define SQ_TEX_WORD0_BC_FRAC_MODE_SIZE 1
+#define SQ_TEX_WORD0_FETCH_WHOLE_QUAD_SIZE 1
+#define SQ_TEX_WORD0_RESOURCE_ID_SIZE  8
+#define SQ_TEX_WORD0_SRC_GPR_SIZE      7
+#define SQ_TEX_WORD0_SRC_REL_SIZE      1
+#define SQ_TEX_WORD0_ALT_CONST_SIZE    1
+
+#define SQ_TEX_WORD0_TEX_INST_SHIFT    0
+#define SQ_TEX_WORD0_BC_FRAC_MODE_SHIFT 5
+#define SQ_TEX_WORD0_FETCH_WHOLE_QUAD_SHIFT 7
+#define SQ_TEX_WORD0_RESOURCE_ID_SHIFT 8
+#define SQ_TEX_WORD0_SRC_GPR_SHIFT     16
+#define SQ_TEX_WORD0_SRC_REL_SHIFT     23
+#define SQ_TEX_WORD0_ALT_CONST_SHIFT   24
+
+#define SQ_TEX_WORD0_TEX_INST_MASK     0x0000001f
+#define SQ_TEX_WORD0_BC_FRAC_MODE_MASK 0x00000020
+#define SQ_TEX_WORD0_FETCH_WHOLE_QUAD_MASK 0x00000080
+#define SQ_TEX_WORD0_RESOURCE_ID_MASK  0x0000ff00
+#define SQ_TEX_WORD0_SRC_GPR_MASK      0x007f0000
+#define SQ_TEX_WORD0_SRC_REL_MASK      0x00800000
+#define SQ_TEX_WORD0_ALT_CONST_MASK    0x01000000
+
+#define SQ_TEX_WORD0_MASK \
+     (SQ_TEX_WORD0_TEX_INST_MASK | \
+      SQ_TEX_WORD0_BC_FRAC_MODE_MASK | \
+      SQ_TEX_WORD0_FETCH_WHOLE_QUAD_MASK | \
+      SQ_TEX_WORD0_RESOURCE_ID_MASK | \
+      SQ_TEX_WORD0_SRC_GPR_MASK | \
+      SQ_TEX_WORD0_SRC_REL_MASK | \
+      SQ_TEX_WORD0_ALT_CONST_MASK)
+
+#define SQ_TEX_WORD0_DEFAULT           0x01cdcd8d
+
+#define SQ_TEX_WORD0_GET_TEX_INST(sq_tex_word0) \
+     ((sq_tex_word0 & SQ_TEX_WORD0_TEX_INST_MASK) >> SQ_TEX_WORD0_TEX_INST_SHIFT)
+#define SQ_TEX_WORD0_GET_BC_FRAC_MODE(sq_tex_word0) \
+     ((sq_tex_word0 & SQ_TEX_WORD0_BC_FRAC_MODE_MASK) >> SQ_TEX_WORD0_BC_FRAC_MODE_SHIFT)
+#define SQ_TEX_WORD0_GET_FETCH_WHOLE_QUAD(sq_tex_word0) \
+     ((sq_tex_word0 & SQ_TEX_WORD0_FETCH_WHOLE_QUAD_MASK) >> SQ_TEX_WORD0_FETCH_WHOLE_QUAD_SHIFT)
+#define SQ_TEX_WORD0_GET_RESOURCE_ID(sq_tex_word0) \
+     ((sq_tex_word0 & SQ_TEX_WORD0_RESOURCE_ID_MASK) >> SQ_TEX_WORD0_RESOURCE_ID_SHIFT)
+#define SQ_TEX_WORD0_GET_SRC_GPR(sq_tex_word0) \
+     ((sq_tex_word0 & SQ_TEX_WORD0_SRC_GPR_MASK) >> SQ_TEX_WORD0_SRC_GPR_SHIFT)
+#define SQ_TEX_WORD0_GET_SRC_REL(sq_tex_word0) \
+     ((sq_tex_word0 & SQ_TEX_WORD0_SRC_REL_MASK) >> SQ_TEX_WORD0_SRC_REL_SHIFT)
+#define SQ_TEX_WORD0_GET_ALT_CONST(sq_tex_word0) \
+     ((sq_tex_word0 & SQ_TEX_WORD0_ALT_CONST_MASK) >> SQ_TEX_WORD0_ALT_CONST_SHIFT)
+
+#define SQ_TEX_WORD0_SET_TEX_INST(sq_tex_word0_reg, tex_inst) \
+     sq_tex_word0_reg = (sq_tex_word0_reg & ~SQ_TEX_WORD0_TEX_INST_MASK) | (tex_inst << SQ_TEX_WORD0_TEX_INST_SHIFT)
+#define SQ_TEX_WORD0_SET_BC_FRAC_MODE(sq_tex_word0_reg, bc_frac_mode) \
+     sq_tex_word0_reg = (sq_tex_word0_reg & ~SQ_TEX_WORD0_BC_FRAC_MODE_MASK) | (bc_frac_mode << SQ_TEX_WORD0_BC_FRAC_MODE_SHIFT)
+#define SQ_TEX_WORD0_SET_FETCH_WHOLE_QUAD(sq_tex_word0_reg, fetch_whole_quad) \
+     sq_tex_word0_reg = (sq_tex_word0_reg & ~SQ_TEX_WORD0_FETCH_WHOLE_QUAD_MASK) | (fetch_whole_quad << SQ_TEX_WORD0_FETCH_WHOLE_QUAD_SHIFT)
+#define SQ_TEX_WORD0_SET_RESOURCE_ID(sq_tex_word0_reg, resource_id) \
+     sq_tex_word0_reg = (sq_tex_word0_reg & ~SQ_TEX_WORD0_RESOURCE_ID_MASK) | (resource_id << SQ_TEX_WORD0_RESOURCE_ID_SHIFT)
+#define SQ_TEX_WORD0_SET_SRC_GPR(sq_tex_word0_reg, src_gpr) \
+     sq_tex_word0_reg = (sq_tex_word0_reg & ~SQ_TEX_WORD0_SRC_GPR_MASK) | (src_gpr << SQ_TEX_WORD0_SRC_GPR_SHIFT)
+#define SQ_TEX_WORD0_SET_SRC_REL(sq_tex_word0_reg, src_rel) \
+     sq_tex_word0_reg = (sq_tex_word0_reg & ~SQ_TEX_WORD0_SRC_REL_MASK) | (src_rel << SQ_TEX_WORD0_SRC_REL_SHIFT)
+#define SQ_TEX_WORD0_SET_ALT_CONST(sq_tex_word0_reg, alt_const) \
+     sq_tex_word0_reg = (sq_tex_word0_reg & ~SQ_TEX_WORD0_ALT_CONST_MASK) | (alt_const << SQ_TEX_WORD0_ALT_CONST_SHIFT)
+
+#if            defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_tex_word0_t {
+          unsigned int tex_inst                       : SQ_TEX_WORD0_TEX_INST_SIZE;
+          unsigned int bc_frac_mode                   : SQ_TEX_WORD0_BC_FRAC_MODE_SIZE;
+          unsigned int                                : 1;
+          unsigned int fetch_whole_quad               : SQ_TEX_WORD0_FETCH_WHOLE_QUAD_SIZE;
+          unsigned int resource_id                    : SQ_TEX_WORD0_RESOURCE_ID_SIZE;
+          unsigned int src_gpr                        : SQ_TEX_WORD0_SRC_GPR_SIZE;
+          unsigned int src_rel                        : SQ_TEX_WORD0_SRC_REL_SIZE;
+          unsigned int alt_const                      : SQ_TEX_WORD0_ALT_CONST_SIZE;
+          unsigned int                                : 7;
+     } sq_tex_word0_t;
+
+#elif          defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_tex_word0_t {
+          unsigned int                                : 7;
+          unsigned int alt_const                      : SQ_TEX_WORD0_ALT_CONST_SIZE;
+          unsigned int src_rel                        : SQ_TEX_WORD0_SRC_REL_SIZE;
+          unsigned int src_gpr                        : SQ_TEX_WORD0_SRC_GPR_SIZE;
+          unsigned int resource_id                    : SQ_TEX_WORD0_RESOURCE_ID_SIZE;
+          unsigned int fetch_whole_quad               : SQ_TEX_WORD0_FETCH_WHOLE_QUAD_SIZE;
+          unsigned int                                : 1;
+          unsigned int bc_frac_mode                   : SQ_TEX_WORD0_BC_FRAC_MODE_SIZE;
+          unsigned int tex_inst                       : SQ_TEX_WORD0_TEX_INST_SIZE;
+     } sq_tex_word0_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_tex_word0_t f;
+} sq_tex_word0_u;
+
+
+/*
+ * SQ_TEX_WORD1 struct
+ */
+
+#define SQ_TEX_WORD1_DST_GPR_SIZE      7
+#define SQ_TEX_WORD1_DST_REL_SIZE      1
+#define SQ_TEX_WORD1_DST_SEL_X_SIZE    3
+#define SQ_TEX_WORD1_DST_SEL_Y_SIZE    3
+#define SQ_TEX_WORD1_DST_SEL_Z_SIZE    3
+#define SQ_TEX_WORD1_DST_SEL_W_SIZE    3
+#define SQ_TEX_WORD1_LOD_BIAS_SIZE     7
+#define SQ_TEX_WORD1_COORD_TYPE_X_SIZE 1
+#define SQ_TEX_WORD1_COORD_TYPE_Y_SIZE 1
+#define SQ_TEX_WORD1_COORD_TYPE_Z_SIZE 1
+#define SQ_TEX_WORD1_COORD_TYPE_W_SIZE 1
+
+#define SQ_TEX_WORD1_DST_GPR_SHIFT     0
+#define SQ_TEX_WORD1_DST_REL_SHIFT     7
+#define SQ_TEX_WORD1_DST_SEL_X_SHIFT   9
+#define SQ_TEX_WORD1_DST_SEL_Y_SHIFT   12
+#define SQ_TEX_WORD1_DST_SEL_Z_SHIFT   15
+#define SQ_TEX_WORD1_DST_SEL_W_SHIFT   18
+#define SQ_TEX_WORD1_LOD_BIAS_SHIFT    21
+#define SQ_TEX_WORD1_COORD_TYPE_X_SHIFT 28
+#define SQ_TEX_WORD1_COORD_TYPE_Y_SHIFT 29
+#define SQ_TEX_WORD1_COORD_TYPE_Z_SHIFT 30
+#define SQ_TEX_WORD1_COORD_TYPE_W_SHIFT 31
+
+#define SQ_TEX_WORD1_DST_GPR_MASK      0x0000007f
+#define SQ_TEX_WORD1_DST_REL_MASK      0x00000080
+#define SQ_TEX_WORD1_DST_SEL_X_MASK    0x00000e00
+#define SQ_TEX_WORD1_DST_SEL_Y_MASK    0x00007000
+#define SQ_TEX_WORD1_DST_SEL_Z_MASK    0x00038000
+#define SQ_TEX_WORD1_DST_SEL_W_MASK    0x001c0000
+#define SQ_TEX_WORD1_LOD_BIAS_MASK     0x0fe00000
+#define SQ_TEX_WORD1_COORD_TYPE_X_MASK 0x10000000
+#define SQ_TEX_WORD1_COORD_TYPE_Y_MASK 0x20000000
+#define SQ_TEX_WORD1_COORD_TYPE_Z_MASK 0x40000000
+#define SQ_TEX_WORD1_COORD_TYPE_W_MASK 0x80000000
+
+#define SQ_TEX_WORD1_MASK \
+     (SQ_TEX_WORD1_DST_GPR_MASK | \
+      SQ_TEX_WORD1_DST_REL_MASK | \
+      SQ_TEX_WORD1_DST_SEL_X_MASK | \
+      SQ_TEX_WORD1_DST_SEL_Y_MASK | \
+      SQ_TEX_WORD1_DST_SEL_Z_MASK | \
+      SQ_TEX_WORD1_DST_SEL_W_MASK | \
+      SQ_TEX_WORD1_LOD_BIAS_MASK | \
+      SQ_TEX_WORD1_COORD_TYPE_X_MASK | \
+      SQ_TEX_WORD1_COORD_TYPE_Y_MASK | \
+      SQ_TEX_WORD1_COORD_TYPE_Z_MASK | \
+      SQ_TEX_WORD1_COORD_TYPE_W_MASK)
+
+#define SQ_TEX_WORD1_DEFAULT           0xcdcdcccd
+
+#define SQ_TEX_WORD1_GET_DST_GPR(sq_tex_word1) \
+     ((sq_tex_word1 & SQ_TEX_WORD1_DST_GPR_MASK) >> SQ_TEX_WORD1_DST_GPR_SHIFT)
+#define SQ_TEX_WORD1_GET_DST_REL(sq_tex_word1) \
+     ((sq_tex_word1 & SQ_TEX_WORD1_DST_REL_MASK) >> SQ_TEX_WORD1_DST_REL_SHIFT)
+#define SQ_TEX_WORD1_GET_DST_SEL_X(sq_tex_word1) \
+     ((sq_tex_word1 & SQ_TEX_WORD1_DST_SEL_X_MASK) >> SQ_TEX_WORD1_DST_SEL_X_SHIFT)
+#define SQ_TEX_WORD1_GET_DST_SEL_Y(sq_tex_word1) \
+     ((sq_tex_word1 & SQ_TEX_WORD1_DST_SEL_Y_MASK) >> SQ_TEX_WORD1_DST_SEL_Y_SHIFT)
+#define SQ_TEX_WORD1_GET_DST_SEL_Z(sq_tex_word1) \
+     ((sq_tex_word1 & SQ_TEX_WORD1_DST_SEL_Z_MASK) >> SQ_TEX_WORD1_DST_SEL_Z_SHIFT)
+#define SQ_TEX_WORD1_GET_DST_SEL_W(sq_tex_word1) \
+     ((sq_tex_word1 & SQ_TEX_WORD1_DST_SEL_W_MASK) >> SQ_TEX_WORD1_DST_SEL_W_SHIFT)
+#define SQ_TEX_WORD1_GET_LOD_BIAS(sq_tex_word1) \
+     ((sq_tex_word1 & SQ_TEX_WORD1_LOD_BIAS_MASK) >> SQ_TEX_WORD1_LOD_BIAS_SHIFT)
+#define SQ_TEX_WORD1_GET_COORD_TYPE_X(sq_tex_word1) \
+     ((sq_tex_word1 & SQ_TEX_WORD1_COORD_TYPE_X_MASK) >> SQ_TEX_WORD1_COORD_TYPE_X_SHIFT)
+#define SQ_TEX_WORD1_GET_COORD_TYPE_Y(sq_tex_word1) \
+     ((sq_tex_word1 & SQ_TEX_WORD1_COORD_TYPE_Y_MASK) >> SQ_TEX_WORD1_COORD_TYPE_Y_SHIFT)
+#define SQ_TEX_WORD1_GET_COORD_TYPE_Z(sq_tex_word1) \
+     ((sq_tex_word1 & SQ_TEX_WORD1_COORD_TYPE_Z_MASK) >> SQ_TEX_WORD1_COORD_TYPE_Z_SHIFT)
+#define SQ_TEX_WORD1_GET_COORD_TYPE_W(sq_tex_word1) \
+     ((sq_tex_word1 & SQ_TEX_WORD1_COORD_TYPE_W_MASK) >> SQ_TEX_WORD1_COORD_TYPE_W_SHIFT)
+
+#define SQ_TEX_WORD1_SET_DST_GPR(sq_tex_word1_reg, dst_gpr) \
+     sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_DST_GPR_MASK) | (dst_gpr << SQ_TEX_WORD1_DST_GPR_SHIFT)
+#define SQ_TEX_WORD1_SET_DST_REL(sq_tex_word1_reg, dst_rel) \
+     sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_DST_REL_MASK) | (dst_rel << SQ_TEX_WORD1_DST_REL_SHIFT)
+#define SQ_TEX_WORD1_SET_DST_SEL_X(sq_tex_word1_reg, dst_sel_x) \
+     sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_DST_SEL_X_MASK) | (dst_sel_x << SQ_TEX_WORD1_DST_SEL_X_SHIFT)
+#define SQ_TEX_WORD1_SET_DST_SEL_Y(sq_tex_word1_reg, dst_sel_y) \
+     sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_DST_SEL_Y_MASK) | (dst_sel_y << SQ_TEX_WORD1_DST_SEL_Y_SHIFT)
+#define SQ_TEX_WORD1_SET_DST_SEL_Z(sq_tex_word1_reg, dst_sel_z) \
+     sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_DST_SEL_Z_MASK) | (dst_sel_z << SQ_TEX_WORD1_DST_SEL_Z_SHIFT)
+#define SQ_TEX_WORD1_SET_DST_SEL_W(sq_tex_word1_reg, dst_sel_w) \
+     sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_DST_SEL_W_MASK) | (dst_sel_w << SQ_TEX_WORD1_DST_SEL_W_SHIFT)
+#define SQ_TEX_WORD1_SET_LOD_BIAS(sq_tex_word1_reg, lod_bias) \
+     sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_LOD_BIAS_MASK) | (lod_bias << SQ_TEX_WORD1_LOD_BIAS_SHIFT)
+#define SQ_TEX_WORD1_SET_COORD_TYPE_X(sq_tex_word1_reg, coord_type_x) \
+     sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_COORD_TYPE_X_MASK) | (coord_type_x << SQ_TEX_WORD1_COORD_TYPE_X_SHIFT)
+#define SQ_TEX_WORD1_SET_COORD_TYPE_Y(sq_tex_word1_reg, coord_type_y) \
+     sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_COORD_TYPE_Y_MASK) | (coord_type_y << SQ_TEX_WORD1_COORD_TYPE_Y_SHIFT)
+#define SQ_TEX_WORD1_SET_COORD_TYPE_Z(sq_tex_word1_reg, coord_type_z) \
+     sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_COORD_TYPE_Z_MASK) | (coord_type_z << SQ_TEX_WORD1_COORD_TYPE_Z_SHIFT)
+#define SQ_TEX_WORD1_SET_COORD_TYPE_W(sq_tex_word1_reg, coord_type_w) \
+     sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_COORD_TYPE_W_MASK) | (coord_type_w << SQ_TEX_WORD1_COORD_TYPE_W_SHIFT)
+
+#if            defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_tex_word1_t {
+          unsigned int dst_gpr                        : SQ_TEX_WORD1_DST_GPR_SIZE;
+          unsigned int dst_rel                        : SQ_TEX_WORD1_DST_REL_SIZE;
+          unsigned int                                : 1;
+          unsigned int dst_sel_x                      : SQ_TEX_WORD1_DST_SEL_X_SIZE;
+          unsigned int dst_sel_y                      : SQ_TEX_WORD1_DST_SEL_Y_SIZE;
+          unsigned int dst_sel_z                      : SQ_TEX_WORD1_DST_SEL_Z_SIZE;
+          unsigned int dst_sel_w                      : SQ_TEX_WORD1_DST_SEL_W_SIZE;
+          unsigned int lod_bias                       : SQ_TEX_WORD1_LOD_BIAS_SIZE;
+          unsigned int coord_type_x                   : SQ_TEX_WORD1_COORD_TYPE_X_SIZE;
+          unsigned int coord_type_y                   : SQ_TEX_WORD1_COORD_TYPE_Y_SIZE;
+          unsigned int coord_type_z                   : SQ_TEX_WORD1_COORD_TYPE_Z_SIZE;
+          unsigned int coord_type_w                   : SQ_TEX_WORD1_COORD_TYPE_W_SIZE;
+     } sq_tex_word1_t;
+
+#elif          defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_tex_word1_t {
+          unsigned int coord_type_w                   : SQ_TEX_WORD1_COORD_TYPE_W_SIZE;
+          unsigned int coord_type_z                   : SQ_TEX_WORD1_COORD_TYPE_Z_SIZE;
+          unsigned int coord_type_y                   : SQ_TEX_WORD1_COORD_TYPE_Y_SIZE;
+          unsigned int coord_type_x                   : SQ_TEX_WORD1_COORD_TYPE_X_SIZE;
+          unsigned int lod_bias                       : SQ_TEX_WORD1_LOD_BIAS_SIZE;
+          unsigned int dst_sel_w                      : SQ_TEX_WORD1_DST_SEL_W_SIZE;
+          unsigned int dst_sel_z                      : SQ_TEX_WORD1_DST_SEL_Z_SIZE;
+          unsigned int dst_sel_y                      : SQ_TEX_WORD1_DST_SEL_Y_SIZE;
+          unsigned int dst_sel_x                      : SQ_TEX_WORD1_DST_SEL_X_SIZE;
+          unsigned int                                : 1;
+          unsigned int dst_rel                        : SQ_TEX_WORD1_DST_REL_SIZE;
+          unsigned int dst_gpr                        : SQ_TEX_WORD1_DST_GPR_SIZE;
+     } sq_tex_word1_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_tex_word1_t f;
+} sq_tex_word1_u;
+
+
+/*
+ * SQ_TEX_WORD2 struct
+ */
+
+#define SQ_TEX_WORD2_OFFSET_X_SIZE     5
+#define SQ_TEX_WORD2_OFFSET_Y_SIZE     5
+#define SQ_TEX_WORD2_OFFSET_Z_SIZE     5
+#define SQ_TEX_WORD2_SAMPLER_ID_SIZE   5
+#define SQ_TEX_WORD2_SRC_SEL_X_SIZE    3
+#define SQ_TEX_WORD2_SRC_SEL_Y_SIZE    3
+#define SQ_TEX_WORD2_SRC_SEL_Z_SIZE    3
+#define SQ_TEX_WORD2_SRC_SEL_W_SIZE    3
+
+#define SQ_TEX_WORD2_OFFSET_X_SHIFT    0
+#define SQ_TEX_WORD2_OFFSET_Y_SHIFT    5
+#define SQ_TEX_WORD2_OFFSET_Z_SHIFT    10
+#define SQ_TEX_WORD2_SAMPLER_ID_SHIFT  15
+#define SQ_TEX_WORD2_SRC_SEL_X_SHIFT   20
+#define SQ_TEX_WORD2_SRC_SEL_Y_SHIFT   23
+#define SQ_TEX_WORD2_SRC_SEL_Z_SHIFT   26
+#define SQ_TEX_WORD2_SRC_SEL_W_SHIFT   29
+
+#define SQ_TEX_WORD2_OFFSET_X_MASK     0x0000001f
+#define SQ_TEX_WORD2_OFFSET_Y_MASK     0x000003e0
+#define SQ_TEX_WORD2_OFFSET_Z_MASK     0x00007c00
+#define SQ_TEX_WORD2_SAMPLER_ID_MASK   0x000f8000
+#define SQ_TEX_WORD2_SRC_SEL_X_MASK    0x00700000
+#define SQ_TEX_WORD2_SRC_SEL_Y_MASK    0x03800000
+#define SQ_TEX_WORD2_SRC_SEL_Z_MASK    0x1c000000
+#define SQ_TEX_WORD2_SRC_SEL_W_MASK    0xe0000000
+
+#define SQ_TEX_WORD2_MASK \
+     (SQ_TEX_WORD2_OFFSET_X_MASK | \
+      SQ_TEX_WORD2_OFFSET_Y_MASK | \
+      SQ_TEX_WORD2_OFFSET_Z_MASK | \
+      SQ_TEX_WORD2_SAMPLER_ID_MASK | \
+      SQ_TEX_WORD2_SRC_SEL_X_MASK | \
+      SQ_TEX_WORD2_SRC_SEL_Y_MASK | \
+      SQ_TEX_WORD2_SRC_SEL_Z_MASK | \
+      SQ_TEX_WORD2_SRC_SEL_W_MASK)
+
+#define SQ_TEX_WORD2_DEFAULT           0xcdcdcdcd
+
+#define SQ_TEX_WORD2_GET_OFFSET_X(sq_tex_word2) \
+     ((sq_tex_word2 & SQ_TEX_WORD2_OFFSET_X_MASK) >> SQ_TEX_WORD2_OFFSET_X_SHIFT)
+#define SQ_TEX_WORD2_GET_OFFSET_Y(sq_tex_word2) \
+     ((sq_tex_word2 & SQ_TEX_WORD2_OFFSET_Y_MASK) >> SQ_TEX_WORD2_OFFSET_Y_SHIFT)
+#define SQ_TEX_WORD2_GET_OFFSET_Z(sq_tex_word2) \
+     ((sq_tex_word2 & SQ_TEX_WORD2_OFFSET_Z_MASK) >> SQ_TEX_WORD2_OFFSET_Z_SHIFT)
+#define SQ_TEX_WORD2_GET_SAMPLER_ID(sq_tex_word2) \
+     ((sq_tex_word2 & SQ_TEX_WORD2_SAMPLER_ID_MASK) >> SQ_TEX_WORD2_SAMPLER_ID_SHIFT)
+#define SQ_TEX_WORD2_GET_SRC_SEL_X(sq_tex_word2) \
+     ((sq_tex_word2 & SQ_TEX_WORD2_SRC_SEL_X_MASK) >> SQ_TEX_WORD2_SRC_SEL_X_SHIFT)
+#define SQ_TEX_WORD2_GET_SRC_SEL_Y(sq_tex_word2) \
+     ((sq_tex_word2 & SQ_TEX_WORD2_SRC_SEL_Y_MASK) >> SQ_TEX_WORD2_SRC_SEL_Y_SHIFT)
+#define SQ_TEX_WORD2_GET_SRC_SEL_Z(sq_tex_word2) \
+     ((sq_tex_word2 & SQ_TEX_WORD2_SRC_SEL_Z_MASK) >> SQ_TEX_WORD2_SRC_SEL_Z_SHIFT)
+#define SQ_TEX_WORD2_GET_SRC_SEL_W(sq_tex_word2) \
+     ((sq_tex_word2 & SQ_TEX_WORD2_SRC_SEL_W_MASK) >> SQ_TEX_WORD2_SRC_SEL_W_SHIFT)
+
+#define SQ_TEX_WORD2_SET_OFFSET_X(sq_tex_word2_reg, offset_x) \
+     sq_tex_word2_reg = (sq_tex_word2_reg & ~SQ_TEX_WORD2_OFFSET_X_MASK) | (offset_x << SQ_TEX_WORD2_OFFSET_X_SHIFT)
+#define SQ_TEX_WORD2_SET_OFFSET_Y(sq_tex_word2_reg, offset_y) \
+     sq_tex_word2_reg = (sq_tex_word2_reg & ~SQ_TEX_WORD2_OFFSET_Y_MASK) | (offset_y << SQ_TEX_WORD2_OFFSET_Y_SHIFT)
+#define SQ_TEX_WORD2_SET_OFFSET_Z(sq_tex_word2_reg, offset_z) \
+     sq_tex_word2_reg = (sq_tex_word2_reg & ~SQ_TEX_WORD2_OFFSET_Z_MASK) | (offset_z << SQ_TEX_WORD2_OFFSET_Z_SHIFT)
+#define SQ_TEX_WORD2_SET_SAMPLER_ID(sq_tex_word2_reg, sampler_id) \
+     sq_tex_word2_reg = (sq_tex_word2_reg & ~SQ_TEX_WORD2_SAMPLER_ID_MASK) | (sampler_id << SQ_TEX_WORD2_SAMPLER_ID_SHIFT)
+#define SQ_TEX_WORD2_SET_SRC_SEL_X(sq_tex_word2_reg, src_sel_x) \
+     sq_tex_word2_reg = (sq_tex_word2_reg & ~SQ_TEX_WORD2_SRC_SEL_X_MASK) | (src_sel_x << SQ_TEX_WORD2_SRC_SEL_X_SHIFT)
+#define SQ_TEX_WORD2_SET_SRC_SEL_Y(sq_tex_word2_reg, src_sel_y) \
+     sq_tex_word2_reg = (sq_tex_word2_reg & ~SQ_TEX_WORD2_SRC_SEL_Y_MASK) | (src_sel_y << SQ_TEX_WORD2_SRC_SEL_Y_SHIFT)
+#define SQ_TEX_WORD2_SET_SRC_SEL_Z(sq_tex_word2_reg, src_sel_z) \
+     sq_tex_word2_reg = (sq_tex_word2_reg & ~SQ_TEX_WORD2_SRC_SEL_Z_MASK) | (src_sel_z << SQ_TEX_WORD2_SRC_SEL_Z_SHIFT)
+#define SQ_TEX_WORD2_SET_SRC_SEL_W(sq_tex_word2_reg, src_sel_w) \
+     sq_tex_word2_reg = (sq_tex_word2_reg & ~SQ_TEX_WORD2_SRC_SEL_W_MASK) | (src_sel_w << SQ_TEX_WORD2_SRC_SEL_W_SHIFT)
+
+#if            defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_tex_word2_t {
+          unsigned int offset_x                       : SQ_TEX_WORD2_OFFSET_X_SIZE;
+          unsigned int offset_y                       : SQ_TEX_WORD2_OFFSET_Y_SIZE;
+          unsigned int offset_z                       : SQ_TEX_WORD2_OFFSET_Z_SIZE;
+          unsigned int sampler_id                     : SQ_TEX_WORD2_SAMPLER_ID_SIZE;
+          unsigned int src_sel_x                      : SQ_TEX_WORD2_SRC_SEL_X_SIZE;
+          unsigned int src_sel_y                      : SQ_TEX_WORD2_SRC_SEL_Y_SIZE;
+          unsigned int src_sel_z                      : SQ_TEX_WORD2_SRC_SEL_Z_SIZE;
+          unsigned int src_sel_w                      : SQ_TEX_WORD2_SRC_SEL_W_SIZE;
+     } sq_tex_word2_t;
+
+#elif          defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_tex_word2_t {
+          unsigned int src_sel_w                      : SQ_TEX_WORD2_SRC_SEL_W_SIZE;
+          unsigned int src_sel_z                      : SQ_TEX_WORD2_SRC_SEL_Z_SIZE;
+          unsigned int src_sel_y                      : SQ_TEX_WORD2_SRC_SEL_Y_SIZE;
+          unsigned int src_sel_x                      : SQ_TEX_WORD2_SRC_SEL_X_SIZE;
+          unsigned int sampler_id                     : SQ_TEX_WORD2_SAMPLER_ID_SIZE;
+          unsigned int offset_z                       : SQ_TEX_WORD2_OFFSET_Z_SIZE;
+          unsigned int offset_y                       : SQ_TEX_WORD2_OFFSET_Y_SIZE;
+          unsigned int offset_x                       : SQ_TEX_WORD2_OFFSET_X_SIZE;
+     } sq_tex_word2_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_tex_word2_t f;
+} sq_tex_word2_u;
+
+
+/*
+ * SQ_VTX_WORD0 struct
+ */
+
+#define SQ_VTX_WORD0_VTX_INST_SIZE     5
+#define SQ_VTX_WORD0_FETCH_TYPE_SIZE   2
+#define SQ_VTX_WORD0_FETCH_WHOLE_QUAD_SIZE 1
+#define SQ_VTX_WORD0_BUFFER_ID_SIZE    8
+#define SQ_VTX_WORD0_SRC_GPR_SIZE      7
+#define SQ_VTX_WORD0_SRC_REL_SIZE      1
+#define SQ_VTX_WORD0_SRC_SEL_X_SIZE    2
+#define SQ_VTX_WORD0_MEGA_FETCH_COUNT_SIZE 6
+
+#define SQ_VTX_WORD0_VTX_INST_SHIFT    0
+#define SQ_VTX_WORD0_FETCH_TYPE_SHIFT  5
+#define SQ_VTX_WORD0_FETCH_WHOLE_QUAD_SHIFT 7
+#define SQ_VTX_WORD0_BUFFER_ID_SHIFT   8
+#define SQ_VTX_WORD0_SRC_GPR_SHIFT     16
+#define SQ_VTX_WORD0_SRC_REL_SHIFT     23
+#define SQ_VTX_WORD0_SRC_SEL_X_SHIFT   24
+#define SQ_VTX_WORD0_MEGA_FETCH_COUNT_SHIFT 26
+
+#define SQ_VTX_WORD0_VTX_INST_MASK     0x0000001f
+#define SQ_VTX_WORD0_FETCH_TYPE_MASK   0x00000060
+#define SQ_VTX_WORD0_FETCH_WHOLE_QUAD_MASK 0x00000080
+#define SQ_VTX_WORD0_BUFFER_ID_MASK    0x0000ff00
+#define SQ_VTX_WORD0_SRC_GPR_MASK      0x007f0000
+#define SQ_VTX_WORD0_SRC_REL_MASK      0x00800000
+#define SQ_VTX_WORD0_SRC_SEL_X_MASK    0x03000000
+#define SQ_VTX_WORD0_MEGA_FETCH_COUNT_MASK 0xfc000000
+
+#define SQ_VTX_WORD0_MASK \
+     (SQ_VTX_WORD0_VTX_INST_MASK | \
+      SQ_VTX_WORD0_FETCH_TYPE_MASK | \
+      SQ_VTX_WORD0_FETCH_WHOLE_QUAD_MASK | \
+      SQ_VTX_WORD0_BUFFER_ID_MASK | \
+      SQ_VTX_WORD0_SRC_GPR_MASK | \
+      SQ_VTX_WORD0_SRC_REL_MASK | \
+      SQ_VTX_WORD0_SRC_SEL_X_MASK | \
+      SQ_VTX_WORD0_MEGA_FETCH_COUNT_MASK)
+
+#define SQ_VTX_WORD0_DEFAULT           0xcdcdcdcd
+
+#define SQ_VTX_WORD0_GET_VTX_INST(sq_vtx_word0) \
+     ((sq_vtx_word0 & SQ_VTX_WORD0_VTX_INST_MASK) >> SQ_VTX_WORD0_VTX_INST_SHIFT)
+#define SQ_VTX_WORD0_GET_FETCH_TYPE(sq_vtx_word0) \
+     ((sq_vtx_word0 & SQ_VTX_WORD0_FETCH_TYPE_MASK) >> SQ_VTX_WORD0_FETCH_TYPE_SHIFT)
+#define SQ_VTX_WORD0_GET_FETCH_WHOLE_QUAD(sq_vtx_word0) \
+     ((sq_vtx_word0 & SQ_VTX_WORD0_FETCH_WHOLE_QUAD_MASK) >> SQ_VTX_WORD0_FETCH_WHOLE_QUAD_SHIFT)
+#define SQ_VTX_WORD0_GET_BUFFER_ID(sq_vtx_word0) \
+     ((sq_vtx_word0 & SQ_VTX_WORD0_BUFFER_ID_MASK) >> SQ_VTX_WORD0_BUFFER_ID_SHIFT)
+#define SQ_VTX_WORD0_GET_SRC_GPR(sq_vtx_word0) \
+     ((sq_vtx_word0 & SQ_VTX_WORD0_SRC_GPR_MASK) >> SQ_VTX_WORD0_SRC_GPR_SHIFT)
+#define SQ_VTX_WORD0_GET_SRC_REL(sq_vtx_word0) \
+     ((sq_vtx_word0 & SQ_VTX_WORD0_SRC_REL_MASK) >> SQ_VTX_WORD0_SRC_REL_SHIFT)
+#define SQ_VTX_WORD0_GET_SRC_SEL_X(sq_vtx_word0) \
+     ((sq_vtx_word0 & SQ_VTX_WORD0_SRC_SEL_X_MASK) >> SQ_VTX_WORD0_SRC_SEL_X_SHIFT)
+#define SQ_VTX_WORD0_GET_MEGA_FETCH_COUNT(sq_vtx_word0) \
+     ((sq_vtx_word0 & SQ_VTX_WORD0_MEGA_FETCH_COUNT_MASK) >> SQ_VTX_WORD0_MEGA_FETCH_COUNT_SHIFT)
+
+#define SQ_VTX_WORD0_SET_VTX_INST(sq_vtx_word0_reg, vtx_inst) \
+     sq_vtx_word0_reg = (sq_vtx_word0_reg & ~SQ_VTX_WORD0_VTX_INST_MASK) | (vtx_inst << SQ_VTX_WORD0_VTX_INST_SHIFT)
+#define SQ_VTX_WORD0_SET_FETCH_TYPE(sq_vtx_word0_reg, fetch_type) \
+     sq_vtx_word0_reg = (sq_vtx_word0_reg & ~SQ_VTX_WORD0_FETCH_TYPE_MASK) | (fetch_type << SQ_VTX_WORD0_FETCH_TYPE_SHIFT)
+#define SQ_VTX_WORD0_SET_FETCH_WHOLE_QUAD(sq_vtx_word0_reg, fetch_whole_quad) \
+     sq_vtx_word0_reg = (sq_vtx_word0_reg & ~SQ_VTX_WORD0_FETCH_WHOLE_QUAD_MASK) | (fetch_whole_quad << SQ_VTX_WORD0_FETCH_WHOLE_QUAD_SHIFT)
+#define SQ_VTX_WORD0_SET_BUFFER_ID(sq_vtx_word0_reg, buffer_id) \
+     sq_vtx_word0_reg = (sq_vtx_word0_reg & ~SQ_VTX_WORD0_BUFFER_ID_MASK) | (buffer_id << SQ_VTX_WORD0_BUFFER_ID_SHIFT)
+#define SQ_VTX_WORD0_SET_SRC_GPR(sq_vtx_word0_reg, src_gpr) \
+     sq_vtx_word0_reg = (sq_vtx_word0_reg & ~SQ_VTX_WORD0_SRC_GPR_MASK) | (src_gpr << SQ_VTX_WORD0_SRC_GPR_SHIFT)
+#define SQ_VTX_WORD0_SET_SRC_REL(sq_vtx_word0_reg, src_rel) \
+     sq_vtx_word0_reg = (sq_vtx_word0_reg & ~SQ_VTX_WORD0_SRC_REL_MASK) | (src_rel << SQ_VTX_WORD0_SRC_REL_SHIFT)
+#define SQ_VTX_WORD0_SET_SRC_SEL_X(sq_vtx_word0_reg, src_sel_x) \
+     sq_vtx_word0_reg = (sq_vtx_word0_reg & ~SQ_VTX_WORD0_SRC_SEL_X_MASK) | (src_sel_x << SQ_VTX_WORD0_SRC_SEL_X_SHIFT)
+#define SQ_VTX_WORD0_SET_MEGA_FETCH_COUNT(sq_vtx_word0_reg, mega_fetch_count) \
+     sq_vtx_word0_reg = (sq_vtx_word0_reg & ~SQ_VTX_WORD0_MEGA_FETCH_COUNT_MASK) | (mega_fetch_count << SQ_VTX_WORD0_MEGA_FETCH_COUNT_SHIFT)
+
+#if            defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_vtx_word0_t {
+          unsigned int vtx_inst                       : SQ_VTX_WORD0_VTX_INST_SIZE;
+          unsigned int fetch_type                     : SQ_VTX_WORD0_FETCH_TYPE_SIZE;
+          unsigned int fetch_whole_quad               : SQ_VTX_WORD0_FETCH_WHOLE_QUAD_SIZE;
+          unsigned int buffer_id                      : SQ_VTX_WORD0_BUFFER_ID_SIZE;
+          unsigned int src_gpr                        : SQ_VTX_WORD0_SRC_GPR_SIZE;
+          unsigned int src_rel                        : SQ_VTX_WORD0_SRC_REL_SIZE;
+          unsigned int src_sel_x                      : SQ_VTX_WORD0_SRC_SEL_X_SIZE;
+          unsigned int mega_fetch_count               : SQ_VTX_WORD0_MEGA_FETCH_COUNT_SIZE;
+     } sq_vtx_word0_t;
+
+#elif          defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_vtx_word0_t {
+          unsigned int mega_fetch_count               : SQ_VTX_WORD0_MEGA_FETCH_COUNT_SIZE;
+          unsigned int src_sel_x                      : SQ_VTX_WORD0_SRC_SEL_X_SIZE;
+          unsigned int src_rel                        : SQ_VTX_WORD0_SRC_REL_SIZE;
+          unsigned int src_gpr                        : SQ_VTX_WORD0_SRC_GPR_SIZE;
+          unsigned int buffer_id                      : SQ_VTX_WORD0_BUFFER_ID_SIZE;
+          unsigned int fetch_whole_quad               : SQ_VTX_WORD0_FETCH_WHOLE_QUAD_SIZE;
+          unsigned int fetch_type                     : SQ_VTX_WORD0_FETCH_TYPE_SIZE;
+          unsigned int vtx_inst                       : SQ_VTX_WORD0_VTX_INST_SIZE;
+     } sq_vtx_word0_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_vtx_word0_t f;
+} sq_vtx_word0_u;
+
+
+/*
+ * SQ_VTX_WORD1 struct
+ */
+
+#define SQ_VTX_WORD1_DST_SEL_X_SIZE    3
+#define SQ_VTX_WORD1_DST_SEL_Y_SIZE    3
+#define SQ_VTX_WORD1_DST_SEL_Z_SIZE    3
+#define SQ_VTX_WORD1_DST_SEL_W_SIZE    3
+#define SQ_VTX_WORD1_USE_CONST_FIELDS_SIZE 1
+#define SQ_VTX_WORD1_DATA_FORMAT_SIZE  6
+#define SQ_VTX_WORD1_NUM_FORMAT_ALL_SIZE 2
+#define SQ_VTX_WORD1_FORMAT_COMP_ALL_SIZE 1
+#define SQ_VTX_WORD1_SRF_MODE_ALL_SIZE 1
+
+#define SQ_VTX_WORD1_DST_SEL_X_SHIFT   9
+#define SQ_VTX_WORD1_DST_SEL_Y_SHIFT   12
+#define SQ_VTX_WORD1_DST_SEL_Z_SHIFT   15
+#define SQ_VTX_WORD1_DST_SEL_W_SHIFT   18
+#define SQ_VTX_WORD1_USE_CONST_FIELDS_SHIFT 21
+#define SQ_VTX_WORD1_DATA_FORMAT_SHIFT 22
+#define SQ_VTX_WORD1_NUM_FORMAT_ALL_SHIFT 28
+#define SQ_VTX_WORD1_FORMAT_COMP_ALL_SHIFT 30
+#define SQ_VTX_WORD1_SRF_MODE_ALL_SHIFT 31
+
+#define SQ_VTX_WORD1_DST_SEL_X_MASK    0x00000e00
+#define SQ_VTX_WORD1_DST_SEL_Y_MASK    0x00007000
+#define SQ_VTX_WORD1_DST_SEL_Z_MASK    0x00038000
+#define SQ_VTX_WORD1_DST_SEL_W_MASK    0x001c0000
+#define SQ_VTX_WORD1_USE_CONST_FIELDS_MASK 0x00200000
+#define SQ_VTX_WORD1_DATA_FORMAT_MASK  0x0fc00000
+#define SQ_VTX_WORD1_NUM_FORMAT_ALL_MASK 0x30000000
+#define SQ_VTX_WORD1_FORMAT_COMP_ALL_MASK 0x40000000
+#define SQ_VTX_WORD1_SRF_MODE_ALL_MASK 0x80000000
+
+#define SQ_VTX_WORD1_MASK \
+     (SQ_VTX_WORD1_DST_SEL_X_MASK | \
+      SQ_VTX_WORD1_DST_SEL_Y_MASK | \
+      SQ_VTX_WORD1_DST_SEL_Z_MASK | \
+      SQ_VTX_WORD1_DST_SEL_W_MASK | \
+      SQ_VTX_WORD1_USE_CONST_FIELDS_MASK | \
+      SQ_VTX_WORD1_DATA_FORMAT_MASK | \
+      SQ_VTX_WORD1_NUM_FORMAT_ALL_MASK | \
+      SQ_VTX_WORD1_FORMAT_COMP_ALL_MASK | \
+      SQ_VTX_WORD1_SRF_MODE_ALL_MASK)
+
+#define SQ_VTX_WORD1_DEFAULT           0xcdcdcc00
+
+#define SQ_VTX_WORD1_GET_DST_SEL_X(sq_vtx_word1) \
+     ((sq_vtx_word1 & SQ_VTX_WORD1_DST_SEL_X_MASK) >> SQ_VTX_WORD1_DST_SEL_X_SHIFT)
+#define SQ_VTX_WORD1_GET_DST_SEL_Y(sq_vtx_word1) \
+     ((sq_vtx_word1 & SQ_VTX_WORD1_DST_SEL_Y_MASK) >> SQ_VTX_WORD1_DST_SEL_Y_SHIFT)
+#define SQ_VTX_WORD1_GET_DST_SEL_Z(sq_vtx_word1) \
+     ((sq_vtx_word1 & SQ_VTX_WORD1_DST_SEL_Z_MASK) >> SQ_VTX_WORD1_DST_SEL_Z_SHIFT)
+#define SQ_VTX_WORD1_GET_DST_SEL_W(sq_vtx_word1) \
+     ((sq_vtx_word1 & SQ_VTX_WORD1_DST_SEL_W_MASK) >> SQ_VTX_WORD1_DST_SEL_W_SHIFT)
+#define SQ_VTX_WORD1_GET_USE_CONST_FIELDS(sq_vtx_word1) \
+     ((sq_vtx_word1 & SQ_VTX_WORD1_USE_CONST_FIELDS_MASK) >> SQ_VTX_WORD1_USE_CONST_FIELDS_SHIFT)
+#define SQ_VTX_WORD1_GET_DATA_FORMAT(sq_vtx_word1) \
+     ((sq_vtx_word1 & SQ_VTX_WORD1_DATA_FORMAT_MASK) >> SQ_VTX_WORD1_DATA_FORMAT_SHIFT)
+#define SQ_VTX_WORD1_GET_NUM_FORMAT_ALL(sq_vtx_word1) \
+     ((sq_vtx_word1 & SQ_VTX_WORD1_NUM_FORMAT_ALL_MASK) >> SQ_VTX_WORD1_NUM_FORMAT_ALL_SHIFT)
+#define SQ_VTX_WORD1_GET_FORMAT_COMP_ALL(sq_vtx_word1) \
+     ((sq_vtx_word1 & SQ_VTX_WORD1_FORMAT_COMP_ALL_MASK) >> SQ_VTX_WORD1_FORMAT_COMP_ALL_SHIFT)
+#define SQ_VTX_WORD1_GET_SRF_MODE_ALL(sq_vtx_word1) \
+     ((sq_vtx_word1 & SQ_VTX_WORD1_SRF_MODE_ALL_MASK) >> SQ_VTX_WORD1_SRF_MODE_ALL_SHIFT)
+
+#define SQ_VTX_WORD1_SET_DST_SEL_X(sq_vtx_word1_reg, dst_sel_x) \
+     sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_DST_SEL_X_MASK) | (dst_sel_x << SQ_VTX_WORD1_DST_SEL_X_SHIFT)
+#define SQ_VTX_WORD1_SET_DST_SEL_Y(sq_vtx_word1_reg, dst_sel_y) \
+     sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_DST_SEL_Y_MASK) | (dst_sel_y << SQ_VTX_WORD1_DST_SEL_Y_SHIFT)
+#define SQ_VTX_WORD1_SET_DST_SEL_Z(sq_vtx_word1_reg, dst_sel_z) \
+     sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_DST_SEL_Z_MASK) | (dst_sel_z << SQ_VTX_WORD1_DST_SEL_Z_SHIFT)
+#define SQ_VTX_WORD1_SET_DST_SEL_W(sq_vtx_word1_reg, dst_sel_w) \
+     sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_DST_SEL_W_MASK) | (dst_sel_w << SQ_VTX_WORD1_DST_SEL_W_SHIFT)
+#define SQ_VTX_WORD1_SET_USE_CONST_FIELDS(sq_vtx_word1_reg, use_const_fields) \
+     sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_USE_CONST_FIELDS_MASK) | (use_const_fields << SQ_VTX_WORD1_USE_CONST_FIELDS_SHIFT)
+#define SQ_VTX_WORD1_SET_DATA_FORMAT(sq_vtx_word1_reg, data_format) \
+     sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_DATA_FORMAT_MASK) | (data_format << SQ_VTX_WORD1_DATA_FORMAT_SHIFT)
+#define SQ_VTX_WORD1_SET_NUM_FORMAT_ALL(sq_vtx_word1_reg, num_format_all) \
+     sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_NUM_FORMAT_ALL_MASK) | (num_format_all << SQ_VTX_WORD1_NUM_FORMAT_ALL_SHIFT)
+#define SQ_VTX_WORD1_SET_FORMAT_COMP_ALL(sq_vtx_word1_reg, format_comp_all) \
+     sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_FORMAT_COMP_ALL_MASK) | (format_comp_all << SQ_VTX_WORD1_FORMAT_COMP_ALL_SHIFT)
+#define SQ_VTX_WORD1_SET_SRF_MODE_ALL(sq_vtx_word1_reg, srf_mode_all) \
+     sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_SRF_MODE_ALL_MASK) | (srf_mode_all << SQ_VTX_WORD1_SRF_MODE_ALL_SHIFT)
+
+#if            defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_vtx_word1_t {
+          unsigned int                                : 9;
+          unsigned int dst_sel_x                      : SQ_VTX_WORD1_DST_SEL_X_SIZE;
+          unsigned int dst_sel_y                      : SQ_VTX_WORD1_DST_SEL_Y_SIZE;
+          unsigned int dst_sel_z                      : SQ_VTX_WORD1_DST_SEL_Z_SIZE;
+          unsigned int dst_sel_w                      : SQ_VTX_WORD1_DST_SEL_W_SIZE;
+          unsigned int use_const_fields               : SQ_VTX_WORD1_USE_CONST_FIELDS_SIZE;
+          unsigned int data_format                    : SQ_VTX_WORD1_DATA_FORMAT_SIZE;
+          unsigned int num_format_all                 : SQ_VTX_WORD1_NUM_FORMAT_ALL_SIZE;
+          unsigned int format_comp_all                : SQ_VTX_WORD1_FORMAT_COMP_ALL_SIZE;
+          unsigned int srf_mode_all                   : SQ_VTX_WORD1_SRF_MODE_ALL_SIZE;
+     } sq_vtx_word1_t;
+
+#elif          defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_vtx_word1_t {
+          unsigned int srf_mode_all                   : SQ_VTX_WORD1_SRF_MODE_ALL_SIZE;
+          unsigned int format_comp_all                : SQ_VTX_WORD1_FORMAT_COMP_ALL_SIZE;
+          unsigned int num_format_all                 : SQ_VTX_WORD1_NUM_FORMAT_ALL_SIZE;
+          unsigned int data_format                    : SQ_VTX_WORD1_DATA_FORMAT_SIZE;
+          unsigned int use_const_fields               : SQ_VTX_WORD1_USE_CONST_FIELDS_SIZE;
+          unsigned int dst_sel_w                      : SQ_VTX_WORD1_DST_SEL_W_SIZE;
+          unsigned int dst_sel_z                      : SQ_VTX_WORD1_DST_SEL_Z_SIZE;
+          unsigned int dst_sel_y                      : SQ_VTX_WORD1_DST_SEL_Y_SIZE;
+          unsigned int dst_sel_x                      : SQ_VTX_WORD1_DST_SEL_X_SIZE;
+          unsigned int                                : 9;
+     } sq_vtx_word1_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_vtx_word1_t f;
+} sq_vtx_word1_u;
+
+
+/*
+ * SQ_VTX_WORD1_GPR struct
+ */
+
+#define SQ_VTX_WORD1_GPR_DST_GPR_SIZE  7
+#define SQ_VTX_WORD1_GPR_DST_REL_SIZE  1
+
+#define SQ_VTX_WORD1_GPR_DST_GPR_SHIFT 0
+#define SQ_VTX_WORD1_GPR_DST_REL_SHIFT 7
+
+#define SQ_VTX_WORD1_GPR_DST_GPR_MASK  0x0000007f
+#define SQ_VTX_WORD1_GPR_DST_REL_MASK  0x00000080
+
+#define SQ_VTX_WORD1_GPR_MASK \
+     (SQ_VTX_WORD1_GPR_DST_GPR_MASK | \
+      SQ_VTX_WORD1_GPR_DST_REL_MASK)
+
+#define SQ_VTX_WORD1_GPR_DEFAULT       0x000000cd
+
+#define SQ_VTX_WORD1_GPR_GET_DST_GPR(sq_vtx_word1_gpr) \
+     ((sq_vtx_word1_gpr & SQ_VTX_WORD1_GPR_DST_GPR_MASK) >> SQ_VTX_WORD1_GPR_DST_GPR_SHIFT)
+#define SQ_VTX_WORD1_GPR_GET_DST_REL(sq_vtx_word1_gpr) \
+     ((sq_vtx_word1_gpr & SQ_VTX_WORD1_GPR_DST_REL_MASK) >> SQ_VTX_WORD1_GPR_DST_REL_SHIFT)
+
+#define SQ_VTX_WORD1_GPR_SET_DST_GPR(sq_vtx_word1_gpr_reg, dst_gpr) \
+     sq_vtx_word1_gpr_reg = (sq_vtx_word1_gpr_reg & ~SQ_VTX_WORD1_GPR_DST_GPR_MASK) | (dst_gpr << SQ_VTX_WORD1_GPR_DST_GPR_SHIFT)
+#define SQ_VTX_WORD1_GPR_SET_DST_REL(sq_vtx_word1_gpr_reg, dst_rel) \
+     sq_vtx_word1_gpr_reg = (sq_vtx_word1_gpr_reg & ~SQ_VTX_WORD1_GPR_DST_REL_MASK) | (dst_rel << SQ_VTX_WORD1_GPR_DST_REL_SHIFT)
+
+#if            defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_vtx_word1_gpr_t {
+          unsigned int dst_gpr                        : SQ_VTX_WORD1_GPR_DST_GPR_SIZE;
+          unsigned int dst_rel                        : SQ_VTX_WORD1_GPR_DST_REL_SIZE;
+          unsigned int                                : 24;
+     } sq_vtx_word1_gpr_t;
+
+#elif          defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_vtx_word1_gpr_t {
+          unsigned int                                : 24;
+          unsigned int dst_rel                        : SQ_VTX_WORD1_GPR_DST_REL_SIZE;
+          unsigned int dst_gpr                        : SQ_VTX_WORD1_GPR_DST_GPR_SIZE;
+     } sq_vtx_word1_gpr_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_vtx_word1_gpr_t f;
+} sq_vtx_word1_gpr_u;
+
+
+/*
+ * SQ_VTX_WORD1_SEM struct
+ */
+
+#define SQ_VTX_WORD1_SEM_SEMANTIC_ID_SIZE 8
+
+#define SQ_VTX_WORD1_SEM_SEMANTIC_ID_SHIFT 0
+
+#define SQ_VTX_WORD1_SEM_SEMANTIC_ID_MASK 0x000000ff
+
+#define SQ_VTX_WORD1_SEM_MASK \
+     (SQ_VTX_WORD1_SEM_SEMANTIC_ID_MASK)
+
+#define SQ_VTX_WORD1_SEM_DEFAULT       0x000000cd
+
+#define SQ_VTX_WORD1_SEM_GET_SEMANTIC_ID(sq_vtx_word1_sem) \
+     ((sq_vtx_word1_sem & SQ_VTX_WORD1_SEM_SEMANTIC_ID_MASK) >> SQ_VTX_WORD1_SEM_SEMANTIC_ID_SHIFT)
+
+#define SQ_VTX_WORD1_SEM_SET_SEMANTIC_ID(sq_vtx_word1_sem_reg, semantic_id) \
+     sq_vtx_word1_sem_reg = (sq_vtx_word1_sem_reg & ~SQ_VTX_WORD1_SEM_SEMANTIC_ID_MASK) | (semantic_id << SQ_VTX_WORD1_SEM_SEMANTIC_ID_SHIFT)
+
+#if            defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_vtx_word1_sem_t {
+          unsigned int semantic_id                    : SQ_VTX_WORD1_SEM_SEMANTIC_ID_SIZE;
+          unsigned int                                : 24;
+     } sq_vtx_word1_sem_t;
+
+#elif          defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_vtx_word1_sem_t {
+          unsigned int                                : 24;
+          unsigned int semantic_id                    : SQ_VTX_WORD1_SEM_SEMANTIC_ID_SIZE;
+     } sq_vtx_word1_sem_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_vtx_word1_sem_t f;
+} sq_vtx_word1_sem_u;
+
+
+/*
+ * SQ_VTX_WORD2 struct
+ */
+
+#define SQ_VTX_WORD2_OFFSET_SIZE       16
+#define SQ_VTX_WORD2_ENDIAN_SWAP_SIZE  2
+#define SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_SIZE 1
+#define SQ_VTX_WORD2_MEGA_FETCH_SIZE   1
+#define SQ_VTX_WORD2_ALT_CONST_SIZE    1
+
+#define SQ_VTX_WORD2_OFFSET_SHIFT      0
+#define SQ_VTX_WORD2_ENDIAN_SWAP_SHIFT 16
+#define SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_SHIFT 18
+#define SQ_VTX_WORD2_MEGA_FETCH_SHIFT  19
+#define SQ_VTX_WORD2_ALT_CONST_SHIFT   20
+
+#define SQ_VTX_WORD2_OFFSET_MASK       0x0000ffff
+#define SQ_VTX_WORD2_ENDIAN_SWAP_MASK  0x00030000
+#define SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_MASK 0x00040000
+#define SQ_VTX_WORD2_MEGA_FETCH_MASK   0x00080000
+#define SQ_VTX_WORD2_ALT_CONST_MASK    0x00100000
+
+#define SQ_VTX_WORD2_MASK \
+     (SQ_VTX_WORD2_OFFSET_MASK | \
+      SQ_VTX_WORD2_ENDIAN_SWAP_MASK | \
+      SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_MASK | \
+      SQ_VTX_WORD2_MEGA_FETCH_MASK | \
+      SQ_VTX_WORD2_ALT_CONST_MASK)
+
+#define SQ_VTX_WORD2_DEFAULT           0x000dcdcd
+
+#define SQ_VTX_WORD2_GET_OFFSET(sq_vtx_word2) \
+     ((sq_vtx_word2 & SQ_VTX_WORD2_OFFSET_MASK) >> SQ_VTX_WORD2_OFFSET_SHIFT)
+#define SQ_VTX_WORD2_GET_ENDIAN_SWAP(sq_vtx_word2) \
+     ((sq_vtx_word2 & SQ_VTX_WORD2_ENDIAN_SWAP_MASK) >> SQ_VTX_WORD2_ENDIAN_SWAP_SHIFT)
+#define SQ_VTX_WORD2_GET_CONST_BUF_NO_STRIDE(sq_vtx_word2) \
+     ((sq_vtx_word2 & SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_MASK) >> SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_SHIFT)
+#define SQ_VTX_WORD2_GET_MEGA_FETCH(sq_vtx_word2) \
+     ((sq_vtx_word2 & SQ_VTX_WORD2_MEGA_FETCH_MASK) >> SQ_VTX_WORD2_MEGA_FETCH_SHIFT)
+#define SQ_VTX_WORD2_GET_ALT_CONST(sq_vtx_word2) \
+     ((sq_vtx_word2 & SQ_VTX_WORD2_ALT_CONST_MASK) >> SQ_VTX_WORD2_ALT_CONST_SHIFT)
+
+#define SQ_VTX_WORD2_SET_OFFSET(sq_vtx_word2_reg, offset) \
+     sq_vtx_word2_reg = (sq_vtx_word2_reg & ~SQ_VTX_WORD2_OFFSET_MASK) | (offset << SQ_VTX_WORD2_OFFSET_SHIFT)
+#define SQ_VTX_WORD2_SET_ENDIAN_SWAP(sq_vtx_word2_reg, endian_swap) \
+     sq_vtx_word2_reg = (sq_vtx_word2_reg & ~SQ_VTX_WORD2_ENDIAN_SWAP_MASK) | (endian_swap << SQ_VTX_WORD2_ENDIAN_SWAP_SHIFT)
+#define SQ_VTX_WORD2_SET_CONST_BUF_NO_STRIDE(sq_vtx_word2_reg, const_buf_no_stride) \
+     sq_vtx_word2_reg = (sq_vtx_word2_reg & ~SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_MASK) | (const_buf_no_stride << SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_SHIFT)
+#define SQ_VTX_WORD2_SET_MEGA_FETCH(sq_vtx_word2_reg, mega_fetch) \
+     sq_vtx_word2_reg = (sq_vtx_word2_reg & ~SQ_VTX_WORD2_MEGA_FETCH_MASK) | (mega_fetch << SQ_VTX_WORD2_MEGA_FETCH_SHIFT)
+#define SQ_VTX_WORD2_SET_ALT_CONST(sq_vtx_word2_reg, alt_const) \
+     sq_vtx_word2_reg = (sq_vtx_word2_reg & ~SQ_VTX_WORD2_ALT_CONST_MASK) | (alt_const << SQ_VTX_WORD2_ALT_CONST_SHIFT)
+
+#if            defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_vtx_word2_t {
+          unsigned int offset                         : SQ_VTX_WORD2_OFFSET_SIZE;
+          unsigned int endian_swap                    : SQ_VTX_WORD2_ENDIAN_SWAP_SIZE;
+          unsigned int const_buf_no_stride            : SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_SIZE;
+          unsigned int mega_fetch                     : SQ_VTX_WORD2_MEGA_FETCH_SIZE;
+          unsigned int alt_const                      : SQ_VTX_WORD2_ALT_CONST_SIZE;
+          unsigned int                                : 11;
+     } sq_vtx_word2_t;
+
+#elif          defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_vtx_word2_t {
+          unsigned int                                : 11;
+          unsigned int alt_const                      : SQ_VTX_WORD2_ALT_CONST_SIZE;
+          unsigned int mega_fetch                     : SQ_VTX_WORD2_MEGA_FETCH_SIZE;
+          unsigned int const_buf_no_stride            : SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_SIZE;
+          unsigned int endian_swap                    : SQ_VTX_WORD2_ENDIAN_SWAP_SIZE;
+          unsigned int offset                         : SQ_VTX_WORD2_OFFSET_SIZE;
+     } sq_vtx_word2_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_vtx_word2_t f;
+} sq_vtx_word2_u;
+
+#endif /* _SQ_MICRO_REG_H */
+
+
index df0128c37829fe431a3fc5e588f4cff842ba04c3..b75db5b80cac3cf4d627a244b2cc1ddcc4115fc3 100644 (file)
@@ -61,8 +61,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "r300_tex.h"
 #elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
 #include "r600_context.h"
-#include "r600_fragprog.h"
-#include "r600_tex.h"
+#include "r700_fragprog.h"
+#include "r700_tex.h"
 #endif
 
 #include "utils.h"
@@ -405,13 +405,13 @@ static const __DRItexBufferExtension r300TexBufferExtension = {
 #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
 static const __DRItexOffsetExtension r600texOffsetExtension = {
     { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION },
-   r600SetTexOffset,
+   r700SetTexOffset,
 };
 
 static const __DRItexBufferExtension r600TexBufferExtension = {
     { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
-   r600SetTexBuffer,
-   r600SetTexBuffer2,
+   r700SetTexBuffer,
+   r700SetTexBuffer2,
 };
 #endif