Port of r200 VB code. VB path should be as fast as the immediate path now, tuxracer...
authorBen Skeggs <darktama@iinet.net.au>
Fri, 11 Feb 2005 05:59:13 +0000 (05:59 +0000)
committerBen Skeggs <darktama@iinet.net.au>
Fri, 11 Feb 2005 05:59:13 +0000 (05:59 +0000)
src/mesa/drivers/dri/r300/Makefile
src/mesa/drivers/dri/r300/r300_cmdbuf.c
src/mesa/drivers/dri/r300/r300_cmdbuf.h
src/mesa/drivers/dri/r300/r300_context.h
src/mesa/drivers/dri/r300/r300_ioctl.c
src/mesa/drivers/dri/r300/r300_maos.c [new file with mode: 0644]
src/mesa/drivers/dri/r300/r300_maos.h [new file with mode: 0644]
src/mesa/drivers/dri/r300/r300_render.c

index f76ffd974de3d3a6ed39af1a52f3b455a8b1e499..1e7a08452cb0b085b6b31db8f88a4199bf0a8598 100644 (file)
@@ -36,6 +36,7 @@ DRIVER_SOURCES = \
                 r300_tex.c \
                 r300_texstate.c \
                 r300_vertexprog.c \
+                r300_maos.c \
                 \
                 r200_context.c \
                 r200_ioctl.c \
index 99c1f23dfdd16f4fedb3926901d9b4c0a59141fb..0434421a492235966feeb690ff253cc32bc3db50 100644 (file)
@@ -645,36 +645,31 @@ void r300EmitWait(r300ContextPtr rmesa, GLuint flags)
        }
 }
 
-void r300EmitLOAD_VBPNTR(r300ContextPtr rmesa, int start)
+void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
 {
-int i, a, count;
-GLuint dw;
-LOCAL_VARS
-
-count=rmesa->state.aos_count;
-
-a=1+(count>>1)*3+(count & 1)*2;
-start_packet3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, a-1);
-e32(count);
-for(i=0;i+1<count;i+=2){
-       e32(  (rmesa->state.aos[i].element_size << 0) 
-            |(rmesa->state.aos[i].stride << 8)
-            |(rmesa->state.aos[i+1].element_size << 16)
-            |(rmesa->state.aos[i+1].stride << 24)
-           );
-       e32(rmesa->state.aos[i].offset+start*4*rmesa->state.aos[i].stride);
-       e32(rmesa->state.aos[i+1].offset+start*4*rmesa->state.aos[i+1].stride);
-       }
-if(count & 1){
-       e32(  (rmesa->state.aos[count-1].element_size << 0) 
-            |(rmesa->state.aos[count-1].stride << 8)
-           );
-       e32(rmesa->state.aos[count-1].offset+start*4*rmesa->state.aos[count-1].stride); 
-       }
+       if (RADEON_DEBUG & DEBUG_VERTS)
+           fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __func__, nr, offset);
+    int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
+    int i;
+    LOCAL_VARS
+
+    start_packet3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, sz-1);
+    e32(nr);
+    for(i=0;i+1<nr;i+=2){
+        e32(  (rmesa->state.aos[i].aos_size << 0)
+             |(rmesa->state.aos[i].aos_stride << 8)
+             |(rmesa->state.aos[i+1].aos_size << 16)
+             |(rmesa->state.aos[i+1].aos_stride << 24)
+        );
+        e32(rmesa->state.aos[i].aos_offset+offset*4*rmesa->state.aos[i].aos_stride);
+        e32(rmesa->state.aos[i+1].aos_offset+offset*4*rmesa->state.aos[i+1].aos_stride);
+    }
+    if(nr & 1){
+        e32(  (rmesa->state.aos[nr-1].aos_size << 0)
+             |(rmesa->state.aos[nr-1].aos_stride << 8)
+        );
+        e32(rmesa->state.aos[nr-1].aos_offset+offset*4*rmesa->state.aos[nr-1].aos_stride);
+    }
 
-/* delay ? */
-#if 0
-e32(RADEON_CP_PACKET2);
-e32(RADEON_CP_PACKET2);
-#endif
 }
+
index 7f30f15562d8106e4dbaf727b73974994e586fd1..b471f2ac463c998de3a6ae9d0ed4a60ed3cf59b3 100644 (file)
@@ -46,6 +46,9 @@ extern void r300EmitState(r300ContextPtr r300);
 extern void r300InitCmdBuf(r300ContextPtr r300);
 extern void r300DestroyCmdBuf(r300ContextPtr r300);
 
+extern void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset);
+
+
 /**
  * Make sure that enough space is available in the command buffer
  * by flushing if necessary.
index bd15d76a5b430da26227ecc0a2585588933c1471..b6a80ae005f190016eee21200a6782225e9caef8 100644 (file)
@@ -110,9 +110,20 @@ struct r300_dma_region {
        struct r300_dma_buffer *buf;
        char *address;          /* == buf->address */
        int start, end, ptr;    /* offsets from start of buf */
-       int aos_start;
-       int aos_stride;
-       int aos_size;
+
+    int aos_offset;     /* address in GART memory */
+    int aos_stride;     /* distance between elements, in dwords */
+    int aos_size;       /* number of components (1-4) */
+    int aos_format;     /* format of components */
+    int aos_reg;        /* VAP register assignment */
+
+/* Left here so immediate mode still works unmodified */
+       int element_size;
+       int stride;
+       int offset;
+       int ncomponents;
+       int reg;
+       int format;
 };
 
 struct r300_dma {
@@ -653,8 +664,12 @@ struct r300_state {
        struct r300_vap_reg_state vap_reg;
        struct r300_vertex_shader_state vertex_shader;
        struct r300_pixel_shader_state pixel_shader;
-       struct r300_aos_rec aos[R300_MAX_AOS_ARRAYS];
+
+       struct r300_dma_region aos[R300_MAX_AOS_ARRAYS];
        int aos_count;
+
+       GLuint *Elts;
+
        GLuint render_inputs; /* actual render inputs that R300 was configured for. 
                                 They are the same as tnl->render_inputs for fixed pipeline */
 
index 6ba5bf257aa4e86d6062bfd5cea84cae3aa3dee3..8b022067f268eca717045516e6c84e87152381ba 100644 (file)
@@ -453,13 +453,13 @@ void r300ReleaseDmaRegion(r300ContextPtr rmesa,
                if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
                        fprintf(stderr, "%s -- DISCARD BUF %d\n", __FUNCTION__,
                                region->buf->buf->idx);
-
                cmd =
                    (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa,
-                                                               sizeof(*cmd),
+                                                               sizeof(*cmd) / 4,
                                                                __FUNCTION__);
-               cmd->dma.cmd_type = RADEON_CMD_DMA_DISCARD;
+               cmd->dma.cmd_type = R300_CMD_DMA_DISCARD;
                cmd->dma.buf_idx = region->buf->buf->idx;
+               
                FREE(region->buf);
                rmesa->dma.nr_released_bufs++;
        }
diff --git a/src/mesa/drivers/dri/r300/r300_maos.c b/src/mesa/drivers/dri/r300/r300_maos.c
new file mode 100644 (file)
index 0000000..a6c15e5
--- /dev/null
@@ -0,0 +1,444 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/r300/r300_maos_arrays.c,v 1.3 2003/02/23 23:59:01 dawes Exp $ */
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "glheader.h"
+#include "mtypes.h"
+#include "colormac.h"
+#include "imports.h"
+#include "macros.h"
+
+#include "swrast_setup/swrast_setup.h"
+#include "math/m_translate.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+
+#include "r300_context.h"
+#include "radeon_ioctl.h"
+#include "r300_state.h"
+#include "r300_maos.h"
+
+#define DEBUG_ALL DEBUG_VERTS
+
+#if defined(USE_X86_ASM)
+#define COPY_DWORDS( dst, src, nr )                                    \
+do {                                                                   \
+       int __tmp;                                                      \
+       __asm__ __volatile__( "rep ; movsl"                             \
+                             : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
+                             : "0" (nr),                               \
+                               "D" ((long)dst),                        \
+                               "S" ((long)src) );                      \
+} while (0)
+#else
+#define COPY_DWORDS( dst, src, nr )            \
+do {                                           \
+   int j;                                      \
+   for ( j = 0 ; j < nr ; j++ )                        \
+      dst[j] = ((int *)src)[j];                        \
+   dst += nr;                                  \
+} while (0)
+#endif
+
+static void emit_vec4(GLcontext * ctx,
+                     struct r300_dma_region *rvb,
+                     char *data, int stride, int count)
+{
+       int i;
+       int *out = (int *)(rvb->address + rvb->start);
+
+       if (RADEON_DEBUG & DEBUG_VERTS)
+               fprintf(stderr, "%s count %d stride %d\n",
+                       __FUNCTION__, count, stride);
+
+       if (stride == 4)
+               COPY_DWORDS(out, data, count);
+       else
+               for (i = 0; i < count; i++) {
+                       out[0] = *(int *)data;
+                       out++;
+                       data += stride;
+               }
+}
+
+static void emit_vec8(GLcontext * ctx,
+                     struct r300_dma_region *rvb,
+                     char *data, int stride, int count)
+{
+       int i;
+       int *out = (int *)(rvb->address + rvb->start);
+
+       if (RADEON_DEBUG & DEBUG_VERTS)
+               fprintf(stderr, "%s count %d stride %d\n",
+                       __FUNCTION__, count, stride);
+
+       if (stride == 8)
+               COPY_DWORDS(out, data, count * 2);
+       else
+               for (i = 0; i < count; i++) {
+                       out[0] = *(int *)data;
+                       out[1] = *(int *)(data + 4);
+                       out += 2;
+                       data += stride;
+               }
+}
+
+static void emit_vec12(GLcontext * ctx,
+                      struct r300_dma_region *rvb,
+                      char *data, int stride, int count)
+{
+       int i;
+       int *out = (int *)(rvb->address + rvb->start);
+
+       if (RADEON_DEBUG & DEBUG_VERTS)
+               fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+                       __FUNCTION__, count, stride, (void *)out, (void *)data);
+
+       if (stride == 12)
+               COPY_DWORDS(out, data, count * 3);
+       else
+               for (i = 0; i < count; i++) {
+                       out[0] = *(int *)data;
+                       out[1] = *(int *)(data + 4);
+                       out[2] = *(int *)(data + 8);
+                       out += 3;
+                       data += stride;
+               }
+}
+
+static void emit_vec16(GLcontext * ctx,
+                      struct r300_dma_region *rvb,
+                      char *data, int stride, int count)
+{
+       int i;
+       int *out = (int *)(rvb->address + rvb->start);
+
+       if (RADEON_DEBUG & DEBUG_VERTS)
+               fprintf(stderr, "%s count %d stride %d\n",
+                       __FUNCTION__, count, stride);
+
+       if (stride == 16)
+               COPY_DWORDS(out, data, count * 4);
+       else
+               for (i = 0; i < count; i++) {
+                       out[0] = *(int *)data;
+                       out[1] = *(int *)(data + 4);
+                       out[2] = *(int *)(data + 8);
+                       out[3] = *(int *)(data + 12);
+                       out += 4;
+                       data += stride;
+               }
+}
+
+static void emit_vector(GLcontext * ctx,
+                       struct r300_dma_region *rvb,
+                       char *data, int size, int stride, int count)
+{
+       r300ContextPtr rmesa = R300_CONTEXT(ctx);
+
+       if (RADEON_DEBUG & DEBUG_VERTS)
+               fprintf(stderr, "%s count %d size %d stride %d\n",
+                       __FUNCTION__, count, size, stride);
+
+       assert(!rvb->buf);
+
+       if (stride == 0) {
+               r300AllocDmaRegion(rmesa, rvb, size * 4, 4);
+               count = 1;
+               rvb->aos_offset = GET_START(rvb);
+               rvb->aos_stride = 0;
+               rvb->aos_size   = size;
+       } else {
+               r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4);    /* alignment? */
+               rvb->aos_offset = GET_START(rvb);
+               rvb->aos_stride = size;
+               rvb->aos_size   = size;
+       }
+
+       /* Emit the data
+        */
+       switch (size) {
+       case 1:
+               emit_vec4(ctx, rvb, data, stride, count);
+               break;
+       case 2:
+               emit_vec8(ctx, rvb, data, stride, count);
+               break;
+       case 3:
+               emit_vec12(ctx, rvb, data, stride, count);
+               break;
+       case 4:
+               emit_vec16(ctx, rvb, data, stride, count);
+               break;
+       default:
+               assert(0);
+               exit(1);
+               break;
+       }
+
+}
+
+/* Emit any changed arrays to new GART memory, re-emit a packet to
+ * update the arrays.
+ */
+void r300EmitArrays(GLcontext * ctx, GLuint inputs)
+{
+       r300ContextPtr rmesa = R300_CONTEXT(ctx);
+       r300ContextPtr r300 = rmesa;
+       struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+       GLuint nr = 0;
+       GLuint count = VB->Count;
+       GLuint dw,mask;
+       GLuint vic_1 = 0;       /* R300_VAP_INPUT_CNTL_1 */
+       GLuint aa_vap_reg = 0; /* VAP register assignment */
+       GLuint i;
+
+/*FIXME: handle vertex program input */
+       if (inputs & _TNL_BIT_POS) {
+               if (RADEON_DEBUG & DEBUG_ALL)
+                       fprintf(stderr, "[%d] _TNL_BIT_POS: sz=%d, st=%d, c=%d\n",
+                               nr, VB->ObjPtr->size, VB->ObjPtr->stride, count);
+               emit_vector(ctx, &rmesa->state.aos[nr++],
+                                       (char *)VB->ObjPtr->data,
+                                       VB->ObjPtr->size,
+                                       VB->ObjPtr->stride, count);
+
+               vic_1 |= R300_INPUT_CNTL_POS;
+               rmesa->state.aos[nr-1].aos_reg    = aa_vap_reg++;
+               rmesa->state.aos[nr-1].aos_format = AOS_FORMAT_FLOAT;
+       }
+
+       if (inputs & _TNL_BIT_NORMAL) {
+               if (RADEON_DEBUG & DEBUG_ALL)
+                       fprintf(stderr, "[%d] _TNL_BIT_NORMAL: sz=%d, st=%d, c=%d\n",
+                               nr, VB->NormalPtr->size, VB->NormalPtr->stride, count);
+               emit_vector(ctx, &rmesa->state.aos[nr++],
+                               (char *)VB->NormalPtr->data,
+                               VB->NormalPtr->size,
+                               VB->NormalPtr->stride, count);
+
+               vic_1 |= R300_INPUT_CNTL_NORMAL;
+               rmesa->state.aos[nr-1].aos_reg    = aa_vap_reg++;
+               rmesa->state.aos[nr-1].aos_format = AOS_FORMAT_FLOAT;
+       }
+
+       if (inputs & VERT_BIT_COLOR0) {
+               int emitsize;
+               if (RADEON_DEBUG & DEBUG_ALL)
+                       fprintf(stderr, "[%d] _TNL_BIT_COLOR0: sz=%d, st=%d, c=%d\n",
+                               nr, VB->ColorPtr[0]->size, VB->ColorPtr[0]->stride, count);
+
+               if (VB->ColorPtr[0]->size == 4 &&
+                   (VB->ColorPtr[0]->stride != 0 ||
+                    VB->ColorPtr[0]->data[0][3] != 1.0)) {
+                       emitsize = 4;
+               } else {
+                       emitsize = 3;
+               }
+
+               emit_vector(ctx,
+                           &(rmesa->state.aos[nr++]),
+                           (char *)VB->ColorPtr[0]->data,
+                           emitsize, VB->ColorPtr[0]->stride, count);
+//                         emitsize, VB->ColorPtr[0]->stride, count);
+
+               vic_1 |= R300_INPUT_CNTL_COLOR;
+               rmesa->state.aos[nr-1].aos_reg    = aa_vap_reg++;
+               rmesa->state.aos[nr-1].aos_format = AOS_FORMAT_FLOAT_COLOR;
+       }
+
+       if (inputs & VERT_BIT_COLOR1) {
+               if (RADEON_DEBUG & DEBUG_ALL)
+                       fprintf(stderr, "[%d] _TNL_BIT_COLOR1: sz=%d, st=%d, c=%d\n",
+                               nr, VB->SecondaryColorPtr[0]->size, VB->SecondaryColorPtr[0]->stride, count);
+
+               emit_vector(ctx,
+                           &rmesa->state.aos[nr++],
+                           (char *)VB->SecondaryColorPtr[0]->data,
+                           VB->SecondaryColorPtr[0]->size, VB->SecondaryColorPtr[0]->stride, count);
+               rmesa->state.aos[nr-1].aos_reg    = aa_vap_reg++;
+               rmesa->state.aos[nr-1].aos_format = AOS_FORMAT_FLOAT_COLOR;
+       }
+
+       for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+               if (inputs & (_TNL_BIT_TEX0 << i)) {
+                       if (RADEON_DEBUG & DEBUG_ALL)
+                               fprintf(stderr, "[%d] _TNL_BIT_TEX%02d: sz=%d, st=%d, c=%d\n",
+                                       nr, i, VB->TexCoordPtr[i]->size, VB->TexCoordPtr[i]->stride, count);
+                       emit_vector(ctx,
+                                   &(rmesa->state.aos[nr++]),
+                                   (char *)VB->TexCoordPtr[i]->data,
+                                   VB->TexCoordPtr[i]->size,
+                                   VB->TexCoordPtr[i]->stride, count);
+
+                       vic_1 |= R300_INPUT_CNTL_TC0 << i;
+                       rmesa->state.aos[nr-1].aos_reg    = aa_vap_reg++;
+                       rmesa->state.aos[nr-1].aos_format = AOS_FORMAT_FLOAT;
+               }
+       }
+
+int cmd_reserved=0;
+int cmd_written=0;
+drm_radeon_cmd_header_t *cmd = NULL;
+
+#define SHOW_INFO(n) do { \
+       if (RADEON_DEBUG & DEBUG_ALL) { \
+       fprintf(stderr, "RR[%d] - sz=%d, reg=%d, fmt=%d -- st=%d, of=0x%08x\n", \
+               n, \
+               r300->state.aos[n].aos_size, \
+               r300->state.aos[n].aos_reg, \
+               r300->state.aos[n].aos_format, \
+               r300->state.aos[n].aos_stride, \
+               r300->state.aos[n].aos_offset); \
+       } \
+} while(0);
+
+       /* setup INPUT_ROUTE */
+       R300_STATECHANGE(r300, vir[0]);
+       for(i=0;i+1<nr;i+=2){
+               SHOW_INFO(i)
+               SHOW_INFO(i+1)
+               dw=(r300->state.aos[i].aos_size-1)
+               | ((r300->state.aos[i].aos_reg)<<8)
+               | (r300->state.aos[i].aos_format<<14)
+               | (((r300->state.aos[i+1].aos_size-1)
+               | ((r300->state.aos[i+1].aos_reg)<<8)
+               | (r300->state.aos[i+1].aos_format<<14))<<16);
+
+               if(i+2==nr){
+                       dw|=(1<<(13+16));
+                       }
+               r300->hw.vir[0].cmd[R300_VIR_CNTL_0+(i>>1)]=dw;
+               }
+       if(nr & 1){
+               SHOW_INFO(nr-1)
+               dw=(r300->state.aos[nr-1].aos_size-1)
+               | (r300->state.aos[nr-1].aos_format<<14)
+               | ((r300->state.aos[nr-1].aos_reg)<<8)
+               | (1<<13);
+               r300->hw.vir[0].cmd[R300_VIR_CNTL_0+(nr>>1)]=dw;
+               //fprintf(stderr, "vir0 dw=%08x\n", dw);
+               }
+       /* Set the rest of INPUT_ROUTE_0 to 0 */
+       //for(i=((count+1)>>1); i<8; i++)r300->hw.vir[0].cmd[R300_VIR_CNTL_0+i]=(0x0);
+       ((drm_r300_cmd_header_t*)r300->hw.vir[0].cmd)->unchecked_state.count = (nr+1)>>1;
+
+
+       /* Mesa assumes that all missing components are from (0, 0, 0, 1) */
+       #define ALL_COMPONENTS ((R300_INPUT_ROUTE_SELECT_X<<R300_INPUT_ROUTE_X_SHIFT) \
+               | (R300_INPUT_ROUTE_SELECT_Y<<R300_INPUT_ROUTE_Y_SHIFT) \
+               | (R300_INPUT_ROUTE_SELECT_Z<<R300_INPUT_ROUTE_Z_SHIFT) \
+               | (R300_INPUT_ROUTE_SELECT_W<<R300_INPUT_ROUTE_W_SHIFT))
+
+       #define ALL_DEFAULT ((R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_X_SHIFT) \
+               | (R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_Y_SHIFT) \
+               | (R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_Z_SHIFT) \
+               | (R300_INPUT_ROUTE_SELECT_ONE<<R300_INPUT_ROUTE_W_SHIFT))
+
+       R300_STATECHANGE(r300, vir[1]);
+
+       for(i=0;i+1<nr;i+=2){
+               /* do i first.. */
+               mask=(1<<(r300->state.aos[i].aos_size*3))-1;
+               dw=(ALL_COMPONENTS & mask)
+               | (ALL_DEFAULT & ~mask)
+               | R300_INPUT_ROUTE_ENABLE;
+
+               /* i+1 */
+               mask=(1<<(r300->state.aos[i+1].aos_size*3))-1;
+               dw|=(
+               (ALL_COMPONENTS & mask)
+               | (ALL_DEFAULT & ~mask)
+               | R300_INPUT_ROUTE_ENABLE
+               )<<16;
+
+               r300->hw.vir[1].cmd[R300_VIR_CNTL_0+(i>>1)]=dw;
+               }
+       if(nr & 1){
+               mask=(1<<(r300->state.aos[nr-1].aos_size*3))-1;
+               dw=(ALL_COMPONENTS & mask)
+               | (ALL_DEFAULT & ~mask)
+               | R300_INPUT_ROUTE_ENABLE;
+               r300->hw.vir[1].cmd[R300_VIR_CNTL_0+(nr>>1)]=dw;
+               //fprintf(stderr, "vir1 dw=%08x\n", dw);
+               }
+       /* Set the rest of INPUT_ROUTE_1 to 0 */
+       //for(i=((count+1)>>1); i<8; i++)r300->hw.vir[1].cmd[R300_VIR_CNTL_0+i]=0x0;
+       ((drm_r300_cmd_header_t*)r300->hw.vir[1].cmd)->unchecked_state.count = (nr+1)>>1;
+
+       /* Set up input_cntl */
+
+       R300_STATECHANGE(r300, vic);
+       r300->hw.vic.cmd[R300_VIC_CNTL_0]=0x5555;  /* Hard coded value, no idea what it means */
+       r300->hw.vic.cmd[R300_VIC_CNTL_1]=vic_1;
+
+#if 0
+       r300->hw.vic.cmd[R300_VIC_CNTL_1]=0;
+       
+       if(r300->state.render_inputs & _TNL_BIT_POS)
+               r300->hw.vic.cmd[R300_VIC_CNTL_1]|=R300_INPUT_CNTL_POS;
+       
+       if(r300->state.render_inputs & _TNL_BIT_NORMAL)
+               r300->hw.vic.cmd[R300_VIC_CNTL_1]|=R300_INPUT_CNTL_NORMAL;
+       
+       if(r300->state.render_inputs & _TNL_BIT_COLOR0)
+               r300->hw.vic.cmd[R300_VIC_CNTL_1]|=R300_INPUT_CNTL_COLOR;
+
+       for(i=0;i < ctx->Const.MaxTextureUnits;i++)
+               if(r300->state.render_inputs & (_TNL_BIT_TEX0<<i))
+                       r300->hw.vic.cmd[R300_VIC_CNTL_1]|=(R300_INPUT_CNTL_TC0<<i);
+#endif
+
+       /* Stage 3: VAP output */
+       R300_STATECHANGE(r300, vof);
+       r300->hw.vof.cmd[R300_VOF_CNTL_0]=R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT
+                                       | R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT;
+
+       r300->hw.vof.cmd[R300_VOF_CNTL_1]=0;
+       for(i=0;i < ctx->Const.MaxTextureUnits;i++)
+               if(r300->state.render_inputs & (_TNL_BIT_TEX0<<i))
+                       r300->hw.vof.cmd[R300_VOF_CNTL_1]|=(4<<(3*i));
+
+       rmesa->state.aos_count = nr;
+}
+
+void r300ReleaseArrays(GLcontext * ctx)
+{
+       r300ContextPtr rmesa = R300_CONTEXT(ctx);
+       int i;
+
+       for (i=0;i<rmesa->state.aos_count;i++) {
+               r300ReleaseDmaRegion(rmesa, &rmesa->state.aos[i], __FUNCTION__);
+       }
+}
diff --git a/src/mesa/drivers/dri/r300/r300_maos.h b/src/mesa/drivers/dri/r300/r300_maos.h
new file mode 100644 (file)
index 0000000..6f6cce2
--- /dev/null
@@ -0,0 +1,48 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_maos.h,v 1.1 2002/10/30 12:51:52 alanh Exp $ */
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef __R300_MAOS_H__
+#define __R300_MAOS_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+#include "r300_context.h"
+
+extern void r300EmitArrays(GLcontext * ctx, GLuint inputs);
+extern void r300ReleaseArrays(GLcontext * ctx);
+
+#endif
+#endif
+
index 56c8a5af4df8bd2a7a25b79b5c511baac00fc529..1d2157449673bab9bde21e5f0ee13010c8eed9fe 100644 (file)
@@ -485,8 +485,8 @@ static GLboolean r300_run_immediate_render(GLcontext *ctx,
    efloat(1.0);
    #endif
    
-   /* We need LOAD_VBPNTR to setup AOS_ATTR fields.. the offsets are irrelevant */
-   r300EmitLOAD_VBPNTR(rmesa, 0);
+/* Why do we need this for immediate mode?? */
+//   r300EmitLOAD_VBPNTR(rmesa, 0);
    
    for(i=0; i < VB->PrimitiveCount; i++){
        GLuint prim = VB->Primitive[i].mode;
@@ -508,91 +508,8 @@ static GLboolean r300_run_immediate_render(GLcontext *ctx,
    return GL_FALSE;
 }
 
-
 /* vertex buffer implementation */
 
-/* We use the start part of GART texture buffer for vertices */
-
-
-static void upload_vertex_buffer(r300ContextPtr rmesa, GLcontext *ctx)
-{
-       TNLcontext *tnl = TNL_CONTEXT(ctx);
-       struct vertex_buffer *VB = &tnl->vb;
-       int idx=0;
-       int i,j,k;
-       radeonScreenPtr rsp=rmesa->radeon.radeonScreen;
-       GLuint render_inputs;
-       
-       /* A hack - we don't want to overwrite vertex buffers, so we
-       just use AGP space for them.. Fix me ! */
-       static int offset=0;
-       if(offset>2*1024*1024){
-               //fprintf(stderr, "Wrapping agp vertex buffer offset\n");
-               offset=0;
-               }
-       /* Not the most efficient implementation, but, for now, I just want something that
-       works */
-       /* to do - make single memcpy per column (is it possible ?) */
-       /* to do - use dirty flags to avoid redundant copies */
-       #define UPLOAD_VECTOR(v)\
-               { \
-               /* Is the data dirty ? */ \
-               if (v->flags & ((1<<v->size)-1)) { \
-                       /* fprintf(stderr, "size=%d vs stride=%d\n", v->size, v->stride); */ \
-                       if(v->size*4==v->stride){\
-                               /* fast path */  \
-                               memcpy(rsp->gartTextures.map+offset, v->data, v->stride*VB->Count); \
-                               } else { \
-                               for(i=0;i<VB->Count;i++){ \
-                                       /* copy one vertex at a time*/ \
-                                       memcpy(rsp->gartTextures.map+offset+i*v->size*4, VEC_ELT(v, GLfloat, i), v->size*4); \
-                                       } \
-                               } \
-                       /* v->flags &= ~((1<<v->size)-1);*/ \
-                       } \
-               rmesa->state.aos[idx].offset=rsp->gartTextures.handle+offset; \
-               offset+=v->size*4*VB->Count; \
-               idx++; \
-               }
-               
-   render_inputs = rmesa->state.render_inputs;
-
-   if(!render_inputs){
-       WARN_ONCE("Aeiee ! render_inputs==0. Skipping rendering.\n");
-       return;
-       }
-       /* coordinates */
-       if(render_inputs & _TNL_BIT_POS)
-               UPLOAD_VECTOR(VB->ObjPtr);
-       if(render_inputs & _TNL_BIT_NORMAL)
-               UPLOAD_VECTOR(VB->NormalPtr);
-       
-       /* color components */
-       if(render_inputs & _TNL_BIT_COLOR0)
-               UPLOAD_VECTOR(VB->ColorPtr[0]);
-       if(render_inputs & _TNL_BIT_COLOR1)
-               UPLOAD_VECTOR(VB->SecondaryColorPtr[0]);
-       
-       if(render_inputs & _TNL_BIT_FOG)
-               UPLOAD_VECTOR(VB->FogCoordPtr);
-       
-       /* texture coordinates */
-       for(k=0;k < ctx->Const.MaxTextureUnits;k++)
-               if(render_inputs & (_TNL_BIT_TEX0<<k))
-                       UPLOAD_VECTOR(VB->TexCoordPtr[k]);
-       
-       if(render_inputs & _TNL_BIT_INDEX)
-               UPLOAD_VECTOR(VB->IndexPtr[0]);
-       if(render_inputs & _TNL_BIT_POINTSIZE)
-               UPLOAD_VECTOR(VB->PointSizePtr);
-
-       if(idx>=R300_MAX_AOS_ARRAYS){
-               fprintf(stderr, "Aieee ! Maximum AOS arrays count exceeded.. \n");
-               exit(-1);
-               }
-       //dump_inputs(ctx, render_inputs); return ;
-}
-
 static void r300_render_vb_primitive(r300ContextPtr rmesa, 
        GLcontext *ctx,
        int start,
@@ -624,50 +541,45 @@ static GLboolean r300_run_vb_render(GLcontext *ctx,
                fprintf(stderr, "%s\n", __FUNCTION__);
 
    
-   reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0);
+       r300ReleaseArrays(ctx);
+       r300EmitArrays(ctx, rmesa->state.render_inputs);
+
+//     LOCK_HARDWARE(&(rmesa->radeon));
+
+       reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0);
        e32(0x0000000a);
    
-   reg_start(0x4f18,0);
+       reg_start(0x4f18,0);
        e32(0x00000003);
-   
-   r300_setup_routing(ctx, GL_FALSE);
-       
-   r300EmitState(rmesa);
 
-   /* setup array of structures data */
-   LOCK_HARDWARE(&(rmesa->radeon));
+       r300EmitState(rmesa);           
 
-   upload_vertex_buffer(rmesa, ctx);
-   //fprintf(stderr, "Using %d AOS arrays\n", n_arrays);
-   
-   for(i=0; i < VB->PrimitiveCount; i++){
-       GLuint prim = VB->Primitive[i].mode;
-       GLuint start = VB->Primitive[i].start;
-       GLuint length = VB->Primitive[i].count;
-                       
-          /* We need LOAD_VBPNTR to setup AOS_ATTR fields.. */
-        r300EmitLOAD_VBPNTR(rmesa, start);
-       
-       r300_render_vb_primitive(rmesa, ctx, start, start + length, prim);
-       }
-       
-    /* This sequence is required after any 3d drawing packet
-      I suspect it works around a bug (or deficiency) in hardware */
-  
-  reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0);
+       rmesa->state.Elts = VB->Elts;
+
+       for(i=0; i < VB->PrimitiveCount; i++){
+               GLuint prim = VB->Primitive[i].mode;
+               GLuint start = VB->Primitive[i].start;
+               GLuint length = VB->Primitive[i].count;
+
+               r300EmitAOS(rmesa, rmesa->state.aos_count, start);   
+
+               r300_render_vb_primitive(rmesa, ctx, start, start + length, prim);
+       }
+
+       reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0);
        e32(0x0000000a);
-   
-   reg_start(0x4f18,0);
+  
+       reg_start(0x4f18,0);
        e32(0x00000003);
-   
-   end_3d(PASS_PREFIX_VOID);
+
+//     end_3d(PASS_PREFIX_VOID);
    
    /* Flush state - we are done drawing.. */
-   r300FlushCmdBufLocked(rmesa, __FUNCTION__);
-   radeonWaitForIdleLocked(&(rmesa->radeon));
+//     r300FlushCmdBufLocked(rmesa, __FUNCTION__);
+//     radeonWaitForIdleLocked(&(rmesa->radeon));
    
-   UNLOCK_HARDWARE(&(rmesa->radeon));
-   return GL_FALSE;
+//     UNLOCK_HARDWARE(&(rmesa->radeon));
+       return GL_FALSE;
 }
 
 /**