Improving Vladimirs alpha test fix a bit as it turns out r300Enable didnt correctly...
[mesa.git] / src / mesa / drivers / dri / r300 / r300_cmdbuf.c
index 580c5145a72bd8c91422e14336a1cc68607eb83c..08551b0f5f65fc94d148beda384e9da6e08cf93d 100644 (file)
@@ -46,8 +46,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "radeon_ioctl.h"
 #include "r300_context.h"
 #include "r300_ioctl.h"
+#include "radeon_reg.h"
 #include "r300_reg.h"
 #include "r300_cmdbuf.h"
+#include "r300_emit.h"
 
 
 // Set this to 1 for extremely verbose debugging of command buffers
@@ -57,7 +59,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 /**
  * Send the current command buffer via ioctl to the hardware.
  */
-int r300FlushCmdBuf(r300ContextPtr r300, const char* caller)
+int r300FlushCmdBufLocked(r300ContextPtr r300, const char* caller)
 {
        int ret;
        int i;
@@ -79,8 +81,6 @@ int r300FlushCmdBuf(r300ContextPtr r300, const char* caller)
                                        r300->cmdbuf.cmd_buf[i]);
        }
 
-       LOCK_HARDWARE(&r300->radeon);
-
        cmd.buf = (char*)(r300->cmdbuf.cmd_buf + start);
        cmd.bufsz = (r300->cmdbuf.count_used - start) * 4;
 
@@ -92,30 +92,41 @@ int r300FlushCmdBuf(r300ContextPtr r300, const char* caller)
                cmd.boxes = (drm_clip_rect_t *)r300->radeon.pClipRects;
        }
 
-       if (cmd.nbox) {
-               ret = drmCommandWrite(r300->radeon.dri.fd,
-                               DRM_RADEON_CMDBUF, &cmd, sizeof(cmd));
-               if (ret) {
-                       UNLOCK_HARDWARE(&r300->radeon);
-                       fprintf(stderr, "drmCommandWrite: %d\n", ret);
-                       exit(-1);
-               }
+       ret = drmCommandWrite(r300->radeon.dri.fd,
+                       DRM_RADEON_CMDBUF, &cmd, sizeof(cmd));
 
-               if (RADEON_DEBUG & DEBUG_SYNC) {
-                       fprintf(stderr, "Syncing in %s\n\n", __FUNCTION__);
-                       radeonWaitForIdleLocked(&r300->radeon);
-               }
-       } else {
-               if (RADEON_DEBUG & DEBUG_IOCTL)
-                       fprintf(stderr, "%s: No cliprects\n", __FUNCTION__);
+       if (RADEON_DEBUG & DEBUG_SYNC) {
+               fprintf(stderr, "Syncing in %s (from %s)\n\n", __FUNCTION__, caller);
+               radeonWaitForIdleLocked(&r300->radeon);
        }
 
-       UNLOCK_HARDWARE(&r300->radeon);
-
+       r300->dma.nr_released_bufs = 0;
        r300->cmdbuf.count_used = 0;
        r300->cmdbuf.count_reemit = 0;
 
-       return 0;
+       return ret;
+}
+
+
+int r300FlushCmdBuf(r300ContextPtr r300, const char* caller)
+{
+       int ret;
+       int i;
+       drm_radeon_cmd_buffer_t cmd;
+       int start;
+
+       LOCK_HARDWARE(&r300->radeon);
+
+       ret=r300FlushCmdBufLocked(r300, caller);
+
+       UNLOCK_HARDWARE(&r300->radeon);
+
+       if (ret) {
+               fprintf(stderr, "drmRadeonCmdBuffer: %d (exiting)\n", ret);
+               exit(ret);
+       }
+
+       return ret;
 }
 
 
@@ -172,7 +183,6 @@ static __inline__ void r300DoEmitState(r300ContextPtr r300, GLboolean dirty)
        }
 }
 
-
 /**
  * Copy dirty hardware state atoms into the command buffer.
  *
@@ -186,7 +196,7 @@ void r300EmitState(r300ContextPtr r300)
 
        if (r300->cmdbuf.count_used && !r300->hw.is_dirty && !r300->hw.all_dirty)
                return;
-
+       
        /* To avoid going across the entire set of states multiple times, just check
         * for enough space for the case of emitting all state, and inline the
         * r300AllocCmdBuf code here without all the checks.
@@ -212,6 +222,7 @@ void r300EmitState(r300ContextPtr r300)
        r300->hw.all_dirty = GL_FALSE;
 }
 
+#if 0
 
 static __inline__ uint32_t cmducs(int reg, int count)
 {
@@ -236,6 +247,7 @@ static __inline__ uint32_t cmdvpu(int addr, int count)
 
        return cmd.u;
 }
+#endif
 
 #define CHECK( NM, COUNT )                             \
 static int check_##NM( r300ContextPtr r300,            \
@@ -257,13 +269,13 @@ CHECK( vpu, vpucount(atom->cmd) ? (1 + vpucount(atom->cmd)*4) : 0 )
 
 #define ALLOC_STATE( ATOM, CHK, SZ, NM, IDX )                          \
    do {                                                                        \
-      r300->hw.ATOM.cmd_size = SZ;                                     \
-      r300->hw.ATOM.cmd = (uint32_t*)CALLOC(SZ * sizeof(uint32_t));    \
-      r300->hw.ATOM.name = NM;                                         \
-      r300->hw.ATOM.idx = IDX;                                         \
+      r300->hw.ATOM.cmd_size = (SZ);                                   \
+      r300->hw.ATOM.cmd = (uint32_t*)CALLOC((SZ) * sizeof(uint32_t));  \
+      r300->hw.ATOM.name = (NM);                                       \
+      r300->hw.ATOM.idx = (IDX);                                       \
       r300->hw.ATOM.check = check_##CHK;                               \
       r300->hw.ATOM.dirty = GL_FALSE;                                  \
-      r300->hw.max_state_size += SZ;                                   \
+      r300->hw.max_state_size += (SZ);                                 \
    } while (0)
 
 
@@ -273,19 +285,20 @@ CHECK( vpu, vpucount(atom->cmd) ? (1 + vpucount(atom->cmd)*4) : 0 )
  */
 void r300InitCmdBuf(r300ContextPtr r300)
 {
-       int size;
-
+       int size, i, mtu;
+       
        r300->hw.max_state_size = 0;
 
+       mtu = r300->radeon.glCtx->Const.MaxTextureUnits;
+       fprintf(stderr, "Using %d maximum texture units..\n", mtu);
+
        /* Initialize state atoms */
        ALLOC_STATE( vpt, always, R300_VPT_CMDSIZE, "vpt", 0 );
                r300->hw.vpt.cmd[R300_VPT_CMD_0] = cmducs(R300_SE_VPORT_XSCALE, 6);
        ALLOC_STATE( unk2080, always, 2, "unk2080", 0 );
                r300->hw.unk2080.cmd[0] = cmducs(0x2080, 1);
-       ALLOC_STATE( ovf, always, R300_OVF_CMDSIZE, "ovf", 0 );
-               r300->hw.ovf.cmd[R300_OVF_CMD_0] = cmducs(R300_VAP_OUTPUT_VTX_FMT_0, 2);
-       ALLOC_STATE( unk20B0, always, 3, "unk20B0", 0 );
-               r300->hw.unk20B0.cmd[0] = cmducs(0x20B0, 2);
+       ALLOC_STATE( vte, always, 3, "vte", 0 );
+               r300->hw.vte.cmd[0] = cmducs(R300_SE_VTE_CNTL, 2);
        ALLOC_STATE( unk2134, always, 3, "unk2134", 0 );
                r300->hw.unk2134.cmd[0] = cmducs(0x2134, 2);
        ALLOC_STATE( unk2140, always, 2, "unk2140", 0 );
@@ -304,12 +317,14 @@ void r300InitCmdBuf(r300ContextPtr r300)
                r300->hw.unk2220.cmd[0] = cmducs(0x2220, 4);
        ALLOC_STATE( unk2288, always, 2, "unk2288", 0 );
                r300->hw.unk2288.cmd[0] = cmducs(0x2288, 1);
+       ALLOC_STATE( vof, always, R300_VOF_CMDSIZE, "vof", 0 );
+               r300->hw.vof.cmd[R300_VOF_CMD_0] = cmducs(R300_VAP_OUTPUT_VTX_FMT_0, 2);
        ALLOC_STATE( pvs, always, R300_PVS_CMDSIZE, "pvs", 0 );
                r300->hw.pvs.cmd[R300_PVS_CMD_0] = cmducs(R300_VAP_PVS_CNTL_1, 3);
-       ALLOC_STATE( unk4008, always, 2, "unk4008", 0 );
-               r300->hw.unk4008.cmd[0] = cmducs(0x4008, 1);
-       ALLOC_STATE( unk4010, always, 6, "unk4010", 0 );
-               r300->hw.unk4010.cmd[0] = cmducs(0x4010, 5);
+       ALLOC_STATE( gb_enable, always, 2, "gb_enable", 0 );
+               r300->hw.gb_enable.cmd[0] = cmducs(R300_GB_ENABLE, 1);
+       ALLOC_STATE( gb_misc, always, R300_GB_MISC_CMDSIZE, "gb_misc", 0 );
+               r300->hw.gb_misc.cmd[0] = cmducs(R300_GB_MSPOS0, 5);
        ALLOC_STATE( txe, always, R300_TXE_CMDSIZE, "txe", 0 );
                r300->hw.txe.cmd[R300_TXE_CMD_0] = cmducs(R300_TX_ENABLE, 1);
        ALLOC_STATE( unk4200, always, 5, "unk4200", 0 );
@@ -320,6 +335,12 @@ void r300InitCmdBuf(r300ContextPtr r300)
                r300->hw.ps.cmd[0] = cmducs(R300_RE_POINTSIZE, 1);
        ALLOC_STATE( unk4230, always, 4, "unk4230", 0 );
                r300->hw.unk4230.cmd[0] = cmducs(0x4230, 3);
+       ALLOC_STATE( lcntl, always, 2, "lcntl", 0 );
+               r300->hw.lcntl.cmd[0] = cmducs(R300_RE_LINE_CNT, 1);
+#ifdef EXP_C
+       ALLOC_STATE( lsf, always, 2, "lsf", 0 );
+               r300->hw.lsf.cmd[0] = cmducs(R300_RE_LINE_STIPPLE_FACTOR, 1);
+#endif
        ALLOC_STATE( unk4260, always, 4, "unk4260", 0 );
                r300->hw.unk4260.cmd[0] = cmducs(0x4260, 3);
        ALLOC_STATE( unk4274, always, 5, "unk4274", 0 );
@@ -328,6 +349,8 @@ void r300InitCmdBuf(r300ContextPtr r300)
                r300->hw.unk4288.cmd[0] = cmducs(0x4288, 5);
        ALLOC_STATE( unk42A0, always, 2, "unk42A0", 0 );
                r300->hw.unk42A0.cmd[0] = cmducs(0x42A0, 1);
+       ALLOC_STATE( zbs, always, R300_ZBS_CMDSIZE, "zbs", 0 );
+               r300->hw.zbs.cmd[R300_ZBS_CMD_0] = cmducs(R300_RE_ZBIAS_T_FACTOR, 4);
        ALLOC_STATE( unk42B4, always, 2, "unk42B4", 0 );
                r300->hw.unk42B4.cmd[0] = cmducs(0x42B4, 1);
        ALLOC_STATE( cul, always, R300_CUL_CMDSIZE, "cul", 0 );
@@ -347,6 +370,8 @@ void r300InitCmdBuf(r300ContextPtr r300)
        ALLOC_STATE( fp, always, R300_FP_CMDSIZE, "fp", 0 );
                r300->hw.fp.cmd[R300_FP_CMD_0] = cmducs(R300_PFS_CNTL_0, 3);
                r300->hw.fp.cmd[R300_FP_CMD_1] = cmducs(R300_PFS_NODE_0, 4);
+       ALLOC_STATE( fpt, variable, R300_FPT_CMDSIZE, "fpt", 0 );
+               r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmducs(R300_PFS_TEXI_0, 0);
        ALLOC_STATE( unk46A4, always, 6, "unk46A4", 0 );
                r300->hw.unk46A4.cmd[0] = cmducs(0x46A4, 5);
        ALLOC_STATE( fpi[0], variable, R300_FPI_CMDSIZE, "fpi/0", 0 );
@@ -362,9 +387,11 @@ void r300InitCmdBuf(r300ContextPtr r300)
        ALLOC_STATE( unk4BC8, always, 4, "unk4BC8", 0 );
                r300->hw.unk4BC8.cmd[0] = cmducs(0x4BC8, 3);
        ALLOC_STATE( at, always, R300_AT_CMDSIZE, "at", 0 );
-               r300->hw.at.cmd[R300_AT_CMD_0] = cmducs(R300_PP_ALPHA_TEST, 1);
+               r300->hw.at.cmd[R300_AT_CMD_0] = cmducs(R300_PP_ALPHA_TEST, 2);
        ALLOC_STATE( unk4BD8, always, 2, "unk4BD8", 0 );
                r300->hw.unk4BD8.cmd[0] = cmducs(0x4BD8, 1);
+       ALLOC_STATE( fpp, variable, R300_FPP_CMDSIZE, "fpp", 0 );
+               r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmducs(R300_PFS_PARAM_0_X, 0);
        ALLOC_STATE( unk4E00, always, 2, "unk4E00", 0 );
                r300->hw.unk4E00.cmd[0] = cmducs(0x4E00, 1);
        ALLOC_STATE( bld, always, R300_BLD_CMDSIZE, "bld", 0 );
@@ -380,10 +407,10 @@ void r300InitCmdBuf(r300ContextPtr r300)
                r300->hw.unk4E50.cmd[0] = cmducs(0x4E50, 9);
        ALLOC_STATE( unk4E88, always, 2, "unk4E88", 0 );
                r300->hw.unk4E88.cmd[0] = cmducs(0x4E88, 1);
-       ALLOC_STATE( zc, always, R300_ZC_CMDSIZE, "zc", 0 );
-               r300->hw.zc.cmd[R300_ZC_CMD_0] = cmducs(R300_RB3D_ZCNTL_0, 2);
-       ALLOC_STATE( unk4F08, always, 2, "unk4F08", 0 );
-               r300->hw.unk4F08.cmd[0] = cmducs(0x4F08, 1);
+       ALLOC_STATE( unk4EA0, always, 3, "unk4EA0 R350 only", 0 );
+               r300->hw.unk4EA0.cmd[0] = cmducs(0x4EA0, 2);
+       ALLOC_STATE( zs, always, R300_ZS_CMDSIZE, "zstencil", 0 );
+               r300->hw.zs.cmd[R300_ZS_CMD_0] = cmducs(R300_RB3D_ZSTENCIL_CNTL_0, 3);
        ALLOC_STATE( unk4F10, always, 5, "unk4F10", 0 );
                r300->hw.unk4F10.cmd[0] = cmducs(0x4F10, 4);
        ALLOC_STATE( zb, always, R300_ZB_CMDSIZE, "zb", 0 );
@@ -404,14 +431,36 @@ void r300InitCmdBuf(r300ContextPtr r300)
        ALLOC_STATE( vps, vpu, R300_VPS_CMDSIZE, "vps", 0 );
                r300->hw.vps.cmd[R300_VPS_CMD_0] = cmdvpu(R300_PVS_UPLOAD_POINTSIZE, 1);
 
+       /* Textures */
+       ALLOC_STATE( tex.filter, variable, mtu+1, "tex_filter", 0 );
+               r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = cmducs(R300_TX_FILTER_0, 0);
+
+       ALLOC_STATE( tex.unknown1, variable, mtu+1, "tex_unknown1", 0 );
+               r300->hw.tex.unknown1.cmd[R300_TEX_CMD_0] = cmducs(R300_TX_UNK1_0, 0);
+
+       ALLOC_STATE( tex.size, variable, mtu+1, "tex_size", 0 );
+               r300->hw.tex.size.cmd[R300_TEX_CMD_0] = cmducs(R300_TX_SIZE_0, 0);
+
+       ALLOC_STATE( tex.format, variable, mtu+1, "tex_format", 0 );
+               r300->hw.tex.format.cmd[R300_TEX_CMD_0] = cmducs(R300_TX_FORMAT_0, 0);
+
+       ALLOC_STATE( tex.offset, variable, mtu+1, "tex_offset", 0 );
+               r300->hw.tex.offset.cmd[R300_TEX_CMD_0] = cmducs(R300_TX_OFFSET_0, 0);
+
+       ALLOC_STATE( tex.unknown4, variable, mtu+1, "tex_unknown4", 0 );
+               r300->hw.tex.unknown4.cmd[R300_TEX_CMD_0] = cmducs(R300_TX_UNK4_0, 0);
+
+       ALLOC_STATE( tex.border_color, variable, mtu+1, "tex_border_color", 0 );
+               r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] = cmducs(R300_TX_BORDER_COLOR_0, 0);
+
+
        /* Setup the atom linked list */
        make_empty_list(&r300->hw.atomlist);
        r300->hw.atomlist.name = "atom-list";
 
        insert_at_tail(&r300->hw.atomlist, &r300->hw.vpt);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.unk2080);
-       insert_at_tail(&r300->hw.atomlist, &r300->hw.ovf);
-       insert_at_tail(&r300->hw.atomlist, &r300->hw.unk20B0);
+       insert_at_tail(&r300->hw.atomlist, &r300->hw.vte);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.unk2134);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.unk2140);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.vir[0]);
@@ -421,18 +470,24 @@ void r300InitCmdBuf(r300ContextPtr r300)
        insert_at_tail(&r300->hw.atomlist, &r300->hw.unk221C);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.unk2220);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.unk2288);
+       insert_at_tail(&r300->hw.atomlist, &r300->hw.vof);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.pvs);
-       insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4008);
-       insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4010);
+       insert_at_tail(&r300->hw.atomlist, &r300->hw.gb_enable);
+       insert_at_tail(&r300->hw.atomlist, &r300->hw.gb_misc);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.txe);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4200);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4214);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.ps);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4230);
+       insert_at_tail(&r300->hw.atomlist, &r300->hw.lcntl);
+#ifdef EXP_C
+       insert_at_tail(&r300->hw.atomlist, &r300->hw.lsf);
+#endif
        insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4260);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4274);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4288);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.unk42A0);
+       insert_at_tail(&r300->hw.atomlist, &r300->hw.zbs);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.unk42B4);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.cul);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.unk42C0);
@@ -442,6 +497,7 @@ void r300InitCmdBuf(r300ContextPtr r300)
        insert_at_tail(&r300->hw.atomlist, &r300->hw.unk43A4);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.unk43E8);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.fp);
+       insert_at_tail(&r300->hw.atomlist, &r300->hw.fpt);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.unk46A4);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.fpi[0]);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.fpi[1]);
@@ -451,6 +507,7 @@ void r300InitCmdBuf(r300ContextPtr r300)
        insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4BC8);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.at);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4BD8);
+       insert_at_tail(&r300->hw.atomlist, &r300->hw.fpp);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4E00);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.bld);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.cmk);
@@ -458,8 +515,8 @@ void r300InitCmdBuf(r300ContextPtr r300)
        insert_at_tail(&r300->hw.atomlist, &r300->hw.cb);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4E50);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4E88);
-       insert_at_tail(&r300->hw.atomlist, &r300->hw.zc);
-       insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4F08);
+       insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4EA0);
+       insert_at_tail(&r300->hw.atomlist, &r300->hw.zs);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4F10);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.zb);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4F28);
@@ -471,18 +528,32 @@ void r300InitCmdBuf(r300ContextPtr r300)
        insert_at_tail(&r300->hw.atomlist, &r300->hw.vpp);
        insert_at_tail(&r300->hw.atomlist, &r300->hw.vps);
 
+       insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.filter);
+       insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.unknown1);
+       insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.size);
+       insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.format);
+       insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.offset);
+       insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.unknown4);
+       insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.border_color);
+
        r300->hw.is_dirty = GL_TRUE;
        r300->hw.all_dirty = GL_TRUE;
 
        /* Initialize command buffer */
        size = 256 * driQueryOptioni(&r300->radeon.optionCache, "command_buffer_size");
-       if (size < 2*r300->hw.max_state_size)
-               size = 2*r300->hw.max_state_size;
+       if (size < 2*r300->hw.max_state_size){
+               size = 2*r300->hw.max_state_size+65535;
+               }
 
-       if (RADEON_DEBUG & DEBUG_IOCTL)
+       if (1 || RADEON_DEBUG & DEBUG_IOCTL){
+               fprintf(stderr, "sizeof(drm_r300_cmd_header_t)=%d\n",
+                       sizeof(drm_r300_cmd_header_t));
+               fprintf(stderr, "sizeof(drm_radeon_cmd_buffer_t)=%d\n",
+                       sizeof(drm_radeon_cmd_buffer_t));
                fprintf(stderr,
                        "Allocating %d bytes command buffer (max state is %d bytes)\n",
                        size*4, r300->hw.max_state_size*4);
+               }
 
        r300->cmdbuf.size = size;
        r300->cmdbuf.cmd_buf = (uint32_t*)CALLOC(size*4);
@@ -505,3 +576,94 @@ void r300DestroyCmdBuf(r300ContextPtr r300)
        }
 }
 
+void r300EmitBlit(r300ContextPtr rmesa,
+                 GLuint color_fmt,
+                 GLuint src_pitch,
+                 GLuint src_offset,
+                 GLuint dst_pitch,
+                 GLuint dst_offset,
+                 GLint srcx, GLint srcy,
+                 GLint dstx, GLint dsty, GLuint w, GLuint h)
+{
+       drm_radeon_cmd_header_t *cmd;
+
+       if (RADEON_DEBUG & DEBUG_IOCTL)
+               fprintf(stderr,
+                       "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n",
+                       __FUNCTION__, src_pitch, src_offset, srcx, srcy,
+                       dst_pitch, dst_offset, dstx, dsty, w, h);
+
+       assert((src_pitch & 63) == 0);
+       assert((dst_pitch & 63) == 0);
+       assert((src_offset & 1023) == 0);
+       assert((dst_offset & 1023) == 0);
+       assert(w < (1 << 16));
+       assert(h < (1 << 16));
+
+       cmd =
+           (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 8 * sizeof(int),
+                                                       __FUNCTION__);
+
+       cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
+       cmd[1].i = R200_CP_CMD_BITBLT_MULTI | (5 << 16);
+       cmd[2].i = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
+                   RADEON_GMC_DST_PITCH_OFFSET_CNTL |
+                   RADEON_GMC_BRUSH_NONE |
+                   (color_fmt << 8) |
+                   RADEON_GMC_SRC_DATATYPE_COLOR |
+                   RADEON_ROP3_S |
+                   RADEON_DP_SRC_SOURCE_MEMORY |
+                   RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
+
+       cmd[3].i = ((src_pitch / 64) << 22) | (src_offset >> 10);
+       cmd[4].i = ((dst_pitch / 64) << 22) | (dst_offset >> 10);
+       cmd[5].i = (srcx << 16) | srcy;
+       cmd[6].i = (dstx << 16) | dsty; /* dst */
+       cmd[7].i = (w << 16) | h;
+}
+
+void r300EmitWait(r300ContextPtr rmesa, GLuint flags)
+{
+       if (rmesa->radeon.dri.drmMinor >= 6) {
+               drm_radeon_cmd_header_t *cmd;
+
+               assert(!(flags & ~(RADEON_WAIT_2D | RADEON_WAIT_3D)));
+
+               cmd =
+                   (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa,
+                                                               1 * sizeof(int),
+                                                               __FUNCTION__);
+               cmd[0].i = 0;
+               cmd[0].wait.cmd_type = R300_CMD_WAIT;
+               cmd[0].wait.flags = flags;
+       }
+}
+
+void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
+{
+       if (RADEON_DEBUG & DEBUG_VERTS)
+           fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __func__, nr, offset);
+    int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
+    int i;
+    LOCAL_VARS
+
+    start_packet3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, sz-1);
+    e32(nr);
+    for(i=0;i+1<nr;i+=2){
+        e32(  (rmesa->state.aos[i].aos_size << 0)
+             |(rmesa->state.aos[i].aos_stride << 8)
+             |(rmesa->state.aos[i+1].aos_size << 16)
+             |(rmesa->state.aos[i+1].aos_stride << 24)
+        );
+        e32(rmesa->state.aos[i].aos_offset+offset*4*rmesa->state.aos[i].aos_stride);
+        e32(rmesa->state.aos[i+1].aos_offset+offset*4*rmesa->state.aos[i+1].aos_stride);
+    }
+    if(nr & 1){
+        e32(  (rmesa->state.aos[nr-1].aos_size << 0)
+             |(rmesa->state.aos[nr-1].aos_stride << 8)
+        );
+        e32(rmesa->state.aos[nr-1].aos_offset+offset*4*rmesa->state.aos[nr-1].aos_stride);
+    }
+
+}
+