r300: fix WPOS for SWTCL
[mesa.git] / src / mesa / drivers / dri / r300 / r300_swtcl.c
index 8aebd9be3ea0b9f434576702c7d2609c23b280f6..56ed519cf414106fcc7e6c0ed7d0723d14cc4320 100644 (file)
@@ -28,303 +28,231 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 /*
  * Authors:
  *   Dave Airlie <airlied@linux.ie>
+ *   Maciej Cencora <m.cencora@gmail.com>
  */
 
-/* derived from r200 swtcl path */
-
-
-
-#include "glheader.h"
-#include "mtypes.h"
-#include "colormac.h"
-#include "enums.h"
-#include "image.h"
-#include "imports.h"
-#include "light.h"
-#include "macros.h"
-
-#include "swrast/s_context.h"
-#include "swrast/s_fog.h"
-#include "swrast_setup/swrast_setup.h"
-#include "math/m_translate.h"
 #include "tnl/tnl.h"
-#include "tnl/t_context.h"
 #include "tnl/t_pipeline.h"
 
-#include "r300_context.h"
-#include "r300_swtcl.h"
 #include "r300_state.h"
-#include "r300_ioctl.h"
+#include "r300_swtcl.h"
 #include "r300_emit.h"
-#include "r300_mem.h"
+#include "r300_tex.h"
+#include "r300_render.h"
 
-static void flush_last_swtcl_prim( r300ContextPtr rmesa  );
-
-
-void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset);
-void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr);
 #define EMIT_ATTR( ATTR, STYLE )                                       \
 do {                                                                   \
-   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR);  \
-   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = (STYLE); \
-   rmesa->swtcl.vertex_attr_count++;                                   \
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR);    \
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE);   \
+   rmesa->radeon.swtcl.vertex_attr_count++;                                    \
 } while (0)
 
 #define EMIT_PAD( N )                                                  \
 do {                                                                   \
-   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = 0;               \
-   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = EMIT_PAD;        \
-   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].offset = (N);             \
-   rmesa->swtcl.vertex_attr_count++;                                   \
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0;         \
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD;  \
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N);               \
+   rmesa->radeon.swtcl.vertex_attr_count++;                                    \
 } while (0)
 
-static void r300SetVertexFormat( GLcontext *ctx )
+#define ADD_ATTR(_attr, _format, _dst_loc, _swizzle, _write_mask, _normalize) \
+do { \
+       attrs[num_attrs].element = (_attr); \
+       attrs[num_attrs].data_type = (_format); \
+       attrs[num_attrs].dst_loc = (_dst_loc); \
+       attrs[num_attrs].swizzle = (_swizzle); \
+       attrs[num_attrs].write_mask = (_write_mask); \
+       attrs[num_attrs]._signed = 0; \
+       attrs[num_attrs].normalize = (_normalize); \
+       ++num_attrs; \
+} while (0)
+
+void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead,  GLuint *_OutputsWritten)
 {
        r300ContextPtr rmesa = R300_CONTEXT( ctx );
        TNLcontext *tnl = TNL_CONTEXT(ctx);
        struct vertex_buffer *VB = &tnl->vb;
-       DECLARE_RENDERINPUTS(index_bitset);
-       GLuint InputsRead = 0, OutputsWritten = 0;
-       int vap_fmt_0 = 0;
-       int vap_vte_cntl = 0;
-       int offset = 0;
-       int vte = 0;
-       GLint inputs[VERT_ATTRIB_MAX];
-       GLint tab[VERT_ATTRIB_MAX];
-       int swizzle[VERT_ATTRIB_MAX][4];
-       GLuint i, nr;
-       GLuint sz, vap_fmt_1 = 0;
-
-       DECLARE_RENDERINPUTS(render_inputs_bitset);
-       RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset);
-       RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset );
-       RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset);
-
-       vte = rmesa->hw.vte.cmd[1];
-       vte &= ~(R300_VTX_XY_FMT | R300_VTX_Z_FMT | R300_VTX_W0_FMT);
-       /* Important:
-        */
-       if ( VB->NdcPtr != NULL ) {
-               VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
-               vte |= R300_VTX_XY_FMT | R300_VTX_Z_FMT;
-       }
-       else {
-               VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr;
-               vte |= R300_VTX_W0_FMT;
+       int first_free_tex = 0;
+       GLuint InputsRead = 0;
+       GLuint OutputsWritten = 0;
+       int num_attrs = 0;
+       GLuint fp_reads = rmesa->selected_fp->Base->InputsRead;
+       struct vertex_attribute *attrs = rmesa->vbuf.attribs;
+
+       rmesa->swtcl.coloroffset = rmesa->swtcl.specoffset = 0;
+       rmesa->radeon.swtcl.vertex_attr_count = 0;
+
+       /* We always want non Ndc coords format */
+       VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr;
+
+       /* Always write position vector */
+       InputsRead |= 1 << VERT_ATTRIB_POS;
+       OutputsWritten |= 1 << VERT_RESULT_HPOS;
+       EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F );
+       ADD_ATTR(VERT_ATTRIB_POS, R300_DATA_TYPE_FLOAT_4, SWTCL_OVM_POS, SWIZZLE_XYZW, MASK_XYZW, 0);
+       rmesa->swtcl.coloroffset = 4;
+
+       if (fp_reads & FRAG_BIT_COL0) {
+               InputsRead |= 1 << VERT_ATTRIB_COLOR0;
+               OutputsWritten |= 1 << VERT_RESULT_COL0;
+#if MESA_LITTLE_ENDIAN
+               EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_RGBA );
+               ADD_ATTR(VERT_ATTRIB_COLOR0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR0, SWIZZLE_XYZW, MASK_XYZW, 1);
+#else
+               EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_ABGR );
+               ADD_ATTR(VERT_ATTRIB_COLOR0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR0, SWIZZLE_XYZW, MASK_XYZW, 1);
+#endif
        }
 
-       assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL );
-       rmesa->swtcl.vertex_attr_count = 0;
+       if (fp_reads & FRAG_BIT_COL1) {
+               GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
+               InputsRead |= 1 << VERT_ATTRIB_COLOR1;
+               OutputsWritten |= 1 << VERT_RESULT_COL1;
+#if MESA_LITTLE_ENDIAN
+               EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4UB_4F_RGBA );
+               ADD_ATTR(VERT_ATTRIB_COLOR1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR1, swiz, MASK_XYZW, 1);
+#else
+               EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4UB_4F_ABGR );
+               ADD_ATTR(VERT_ATTRIB_COLOR1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR1, swiz, MASK_XYZW, 1);
+#endif
+               rmesa->swtcl.specoffset = rmesa->swtcl.coloroffset + 1;
+       }
 
-       /* EMIT_ATTR's must be in order as they tell t_vertex.c how to
-        * build up a hardware vertex.
-        */
-       if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POS)) {
-               sz = VB->AttribPtr[VERT_ATTRIB_POS]->size;
-               InputsRead |= 1 << VERT_ATTRIB_POS;
-               OutputsWritten |= 1 << VERT_RESULT_HPOS;
-               EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_1F + sz - 1 );
-               offset = sz;
-       } else {
-               offset = 4;
-               EMIT_PAD(4 * sizeof(float));
+       if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) {
+               VB->AttribPtr[VERT_ATTRIB_GENERIC0] = VB->ColorPtr[1];
+               OutputsWritten |= 1 << VERT_RESULT_BFC0;
+#if MESA_LITTLE_ENDIAN
+               EMIT_ATTR( _TNL_ATTRIB_GENERIC0, EMIT_4UB_4F_RGBA );
+               ADD_ATTR(VERT_ATTRIB_GENERIC0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR2, SWIZZLE_XYZW, MASK_XYZW, 1);
+#else
+               EMIT_ATTR( _TNL_ATTRIB_GENERIC0, EMIT_4UB_4F_ABGR );
+               ADD_ATTR(VERT_ATTRIB_GENERIC0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR2, SWIZZLE_XYZW, MASK_XYZW, 1);
+#endif
+               if (fp_reads & FRAG_BIT_COL1) {
+                       VB->AttribPtr[VERT_ATTRIB_GENERIC1] = VB->SecondaryColorPtr[1];
+                       GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
+                       OutputsWritten |= 1 << VERT_RESULT_BFC1;
+#if MESA_LITTLE_ENDIAN
+                       EMIT_ATTR( _TNL_ATTRIB_GENERIC1, EMIT_4UB_4F_RGBA );
+                       ADD_ATTR(VERT_ATTRIB_GENERIC1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR3, swiz, MASK_XYZW, 1);
+#else
+                       EMIT_ATTR( _TNL_ATTRIB_GENERIC1, EMIT_4UB_4F_ABGR );
+                       ADD_ATTR(VERT_ATTRIB_GENERIC1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR3, swiz, MASK_XYZW, 1);
+#endif
+               }
        }
 
-       if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POINTSIZE )) {
+       if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_POINTSIZE )) {
+               GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO);
+               InputsRead |= 1 << VERT_ATTRIB_POINT_SIZE;
+               OutputsWritten |= 1 << VERT_RESULT_PSIZ;
                EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F );
-               vap_fmt_0 |=  R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
-               offset += 1;
+               ADD_ATTR(VERT_ATTRIB_POINT_SIZE, R300_DATA_TYPE_FLOAT_1, SWTCL_OVM_POINT_SIZE, swiz, MASK_X, 0);
        }
 
-       if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_COLOR0)) {
-               sz = VB->AttribPtr[VERT_ATTRIB_COLOR0]->size;
-               rmesa->swtcl.coloroffset = offset;
-               InputsRead |= 1 << VERT_ATTRIB_COLOR0;
-               OutputsWritten |= 1 << VERT_RESULT_COL0;
-               EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_1F + sz - 1 );
-               offset += sz;
+       if (rmesa->selected_fp->wpos_attr != FRAG_ATTRIB_MAX) {
+               int tex_id = rmesa->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0;
+
+               VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_POS];
+               VB->TexCoordPtr[tex_id] = VB->AttribPtr[VERT_ATTRIB_POS];
+               RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id);
        }
 
-       rmesa->swtcl.specoffset = 0;
-       if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) {
-               sz = VB->AttribPtr[VERT_ATTRIB_COLOR1]->size;
-               rmesa->swtcl.specoffset = offset;
-               EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_1F + sz - 1 );
-               InputsRead |= 1 << VERT_ATTRIB_COLOR1;
-               OutputsWritten |= 1 << VERT_RESULT_COL1;
+       if (rmesa->selected_fp->fog_attr != FRAG_ATTRIB_MAX) {
+               int tex_id = rmesa->selected_fp->fog_attr - FRAG_ATTRIB_TEX0;
+
+               VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG];
+               VB->TexCoordPtr[tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG];
+               RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id);
        }
 
-       if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {
+       /**
+        *  Sending only one texcoord component may lead to lock up,
+        *  so for all textures always output 4 texcoord components to RS.
+        */
+       {
                int i;
-
+               GLuint swiz, format, hw_format;
                for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
-                       if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) {
-                               sz = VB->TexCoordPtr[i]->size;
+                       if (fp_reads & FRAG_BIT_TEX(i)) {
+                               switch (VB->TexCoordPtr[i]->size) {
+                                       case 1:
+                                               format = EMIT_1F;
+                                               hw_format = R300_DATA_TYPE_FLOAT_1;
+                                               swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE);
+                                               break;
+                                       case 2:
+                                               format = EMIT_2F;
+                                               hw_format = R300_DATA_TYPE_FLOAT_2;
+                                               swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ONE);
+                                               break;
+                                       case 3:
+                                               format = EMIT_3F;
+                                               hw_format = R300_DATA_TYPE_FLOAT_3;
+                                               swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
+                                               break;
+                                       case 4:
+                                               format = EMIT_4F;
+                                               hw_format = R300_DATA_TYPE_FLOAT_4;
+                                               swiz = SWIZZLE_XYZW;
+                                               break;
+                                       default:
+                                               continue;
+                               }
                                InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i);
                                OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
-                               EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_1F + sz - 1 );
-                               vap_fmt_1 |= sz << (3 * i);
+                               EMIT_ATTR(_TNL_ATTRIB_TEX(i), format);
+                               ADD_ATTR(VERT_ATTRIB_TEX0 + i, hw_format, SWTCL_OVM_TEX(first_free_tex), swiz, MASK_XYZW, 0);
+                               ++first_free_tex;
                        }
                }
        }
 
-       for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
-               if (InputsRead & (1 << i)) {
-                       inputs[i] = nr++;
-               } else {
-                       inputs[i] = -1;
-               }
-       }
-       
-       /* Fixed, apply to vir0 only */
-       if (InputsRead & (1 << VERT_ATTRIB_POS))
-               inputs[VERT_ATTRIB_POS] = 0;
-       if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
-               inputs[VERT_ATTRIB_COLOR0] = 2;
-       if (InputsRead & (1 << VERT_ATTRIB_COLOR1))
-               inputs[VERT_ATTRIB_COLOR1] = 3;
-       for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++)
-               if (InputsRead & (1 << i))
-                       inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
-       
-       for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
-               if (InputsRead & (1 << i)) {
-                       tab[nr++] = i;
-               }
-       }
-       
-       for (i = 0; i < nr; i++) {
-               int ci;
-               
-               swizzle[i][0] = SWIZZLE_ZERO;
-               swizzle[i][1] = SWIZZLE_ZERO;
-               swizzle[i][2] = SWIZZLE_ZERO;
-               swizzle[i][3] = SWIZZLE_ONE;
-
-               for (ci = 0; ci < VB->AttribPtr[tab[i]]->size; ci++) {
-                       swizzle[i][ci] = ci;
-               }
+       if (first_free_tex >= ctx->Const.MaxTextureUnits) {
+               fprintf(stderr, "\tout of free texcoords to write fog coordinate\n");
+               _mesa_exit(-1);
        }
 
        R300_NEWPRIM(rmesa);
-       R300_STATECHANGE(rmesa, vir[0]);
-       ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count =
-               r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0],
-                                  VB->AttribPtr, inputs, tab, nr);
-       R300_STATECHANGE(rmesa, vir[1]);
-       ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
-               r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
-                                  nr);
-   
-       R300_STATECHANGE(rmesa, vic);
-       rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
-       rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
-   
-       R300_STATECHANGE(rmesa, vof);
-       rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten);
-       rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = vap_fmt_1;
-   
-       rmesa->swtcl.vertex_size =
-               _tnl_install_attrs( ctx,
-                                   rmesa->swtcl.vertex_attrs, 
-                                   rmesa->swtcl.vertex_attr_count,
-                                   NULL, 0 );
-       
-       rmesa->swtcl.vertex_size /= 4;
+       rmesa->vbuf.num_attribs = num_attrs;
+       *_InputsRead = InputsRead;
+       *_OutputsWritten = OutputsWritten;
 
-       RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset );
-
-
-       R300_STATECHANGE(rmesa, vte);
-       rmesa->hw.vte.cmd[1] = vte;
-       rmesa->hw.vte.cmd[2] = rmesa->swtcl.vertex_size;
-}
-
-
-/* Flush vertices in the current dma region.
- */
-static void flush_last_swtcl_prim( r300ContextPtr rmesa  )
-{
-       if (RADEON_DEBUG & DEBUG_IOCTL)
-               fprintf(stderr, "%s\n", __FUNCTION__);
-       
-       rmesa->dma.flush = NULL;
-
-       if (rmesa->dma.current.buf) {
-               struct r300_dma_region *current = &rmesa->dma.current;
-               GLuint current_offset = GET_START(current);
-
-               assert (current->start + 
-                       rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
-                       current->ptr);
-
-               if (rmesa->dma.current.start != rmesa->dma.current.ptr) {
-
-                       r300EnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size + (12*sizeof(int)), __FUNCTION__);
-                       
-                       r300EmitState(rmesa);
-                       
-                       r300EmitVertexAOS( rmesa,
-                                          rmesa->swtcl.vertex_size,
-                                          current_offset);
-                       
-                       r300EmitVbufPrim( rmesa,
-                                         rmesa->swtcl.hw_primitive,
-                                         rmesa->swtcl.numverts);
-                       
-                       r300EmitCacheFlush(rmesa);
-               }
-               
-               rmesa->swtcl.numverts = 0;
-               current->start = current->ptr;
-       }
+       RENDERINPUTS_COPY(rmesa->render_inputs_bitset, tnl->render_inputs_bitset);
 }
 
-/* Alloc space in the current dma region.
- */
-static void *
-r300AllocDmaLowVerts( r300ContextPtr rmesa, int nverts, int vsize )
+static void r300PrepareVertices(GLcontext *ctx)
 {
-       GLuint bytes = vsize * nverts;
-
-       if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) 
-               r300RefillCurrentDmaRegion( rmesa, bytes);
+       r300ContextPtr rmesa = R300_CONTEXT(ctx);
+       GLuint InputsRead, OutputsWritten;
 
-       if (!rmesa->dma.flush) {
-               rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
-               rmesa->dma.flush = flush_last_swtcl_prim;
-       }
+       r300ChooseSwtclVertexFormat(ctx, &InputsRead, &OutputsWritten);
+       r300SetupVAP(ctx, InputsRead, OutputsWritten);
 
-       ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
-       ASSERT( rmesa->dma.flush == flush_last_swtcl_prim );
-       ASSERT( rmesa->dma.current.start + 
-               rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
-               rmesa->dma.current.ptr );
+       rmesa->radeon.swtcl.vertex_size =
+               _tnl_install_attrs( ctx,
+                                   rmesa->radeon.swtcl.vertex_attrs,
+                                   rmesa->radeon.swtcl.vertex_attr_count,
+                                   NULL, 0 );
 
-       {
-               GLubyte *head = (GLubyte *) (rmesa->dma.current.address + rmesa->dma.current.ptr);
-               rmesa->dma.current.ptr += bytes;
-               rmesa->swtcl.numverts += nverts;
-               return head;
-       }
+       rmesa->radeon.swtcl.vertex_size /= 4;
 }
 
+
 static GLuint reduced_prim[] = {
-  GL_POINTS,
-  GL_LINES,
-  GL_LINES,
-  GL_LINES,
-  GL_TRIANGLES,
-  GL_TRIANGLES,
-  GL_TRIANGLES,
-  GL_TRIANGLES,
-  GL_TRIANGLES,
-  GL_TRIANGLES,
+       GL_POINTS,
+       GL_LINES,
+       GL_LINES,
+       GL_LINES,
+       GL_TRIANGLES,
+       GL_TRIANGLES,
+       GL_TRIANGLES,
+       GL_TRIANGLES,
+       GL_TRIANGLES,
+       GL_TRIANGLES,
 };
 
 static void r300RasterPrimitive( GLcontext *ctx, GLuint prim );
-static void r300RenderPrimitive( GLcontext *ctx, GLenum prim );
-//static void r300ResetLineStipple( GLcontext *ctx );
 
 /***********************************************************************
  *                    Emit primitives as inline vertices               *
@@ -346,15 +274,13 @@ static void r300RenderPrimitive( GLcontext *ctx, GLenum prim );
 #undef LOCAL_VARS
 #undef ALLOC_VERTS
 #define CTX_ARG r300ContextPtr rmesa
-#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size
-#define ALLOC_VERTS( n, size ) r300AllocDmaLowVerts( rmesa, n, size * 4 )
+#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size
+#define ALLOC_VERTS( n, size ) rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 )
 #define LOCAL_VARS                                             \
    r300ContextPtr rmesa = R300_CONTEXT(ctx);           \
-   const char *r300verts = (char *)rmesa->swtcl.verts;
+   const char *r300verts = (char *)rmesa->radeon.swtcl.verts;
 #define VERT(x) (r300Vertex *)(r300verts + ((x) * vertsize * sizeof(int)))
-#define VERTEX r300Vertex 
-#define DO_DEBUG_VERTS (1 && (RADEON_DEBUG & DEBUG_VERTS))
-#define PRINT_VERTEX(x)
+#define VERTEX r300Vertex
 #undef TAG
 #define TAG(x) r300_##x
 #include "tnl_dd/t_dd_triemit.h"
@@ -374,9 +300,8 @@ static void r300RenderPrimitive( GLcontext *ctx, GLenum prim );
  *              Build render functions from dd templates               *
  ***********************************************************************/
 
-#define R300_TWOSIDE_BIT       0x01
-#define R300_UNFILLED_BIT      0x02
-#define R300_MAX_TRIFUNC       0x04
+#define R300_UNFILLED_BIT      0x01
+#define R300_MAX_TRIFUNC       0x02
 
 static struct {
    tnl_points_func             points;
@@ -387,9 +312,9 @@ static struct {
 
 #define DO_FALLBACK  0
 #define DO_UNFILLED (IND & R300_UNFILLED_BIT)
-#define DO_TWOSIDE  (IND & R300_TWOSIDE_BIT)
+#define DO_TWOSIDE   0
 #define DO_FLAT      0
-#define DO_OFFSET     0
+#define DO_OFFSET    0
 #define DO_TRI       1
 #define DO_QUAD      1
 #define DO_LINE      1
@@ -409,33 +334,39 @@ static struct {
 #define VERT_Y(_v) _v->v.y
 #define VERT_Z(_v) _v->v.z
 #define AREA_IS_CCW( a ) (a < 0)
-#define GET_VERTEX(e) (rmesa->swtcl.verts + (e*rmesa->swtcl.vertex_size*sizeof(int)))
-
-/* Only used to pull back colors into vertices (ie, we know color is
- * floating point).
- */
-#define R300_COLOR( dst, src )                         \
-do {                                                   \
-   UNCLAMPED_FLOAT_TO_UBYTE((dst)[0], (src)[2]);       \
-   UNCLAMPED_FLOAT_TO_UBYTE((dst)[1], (src)[1]);       \
-   UNCLAMPED_FLOAT_TO_UBYTE((dst)[2], (src)[0]);       \
-   UNCLAMPED_FLOAT_TO_UBYTE((dst)[3], (src)[3]);       \
+#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + (e*rmesa->radeon.swtcl.vertex_size*sizeof(int)))
+
+#define VERT_SET_RGBA( v, c ) \
+do { \
+   r300_color_t *color = (r300_color_t *)&((v)->ui[coloroffset]); \
+   UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]); \
+   UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]); \
+   UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]); \
+   UNCLAMPED_FLOAT_TO_UBYTE(color->alpha, (c)[3]); \
 } while (0)
 
-#define VERT_SET_RGBA( v, c )    if (coloroffset) R300_COLOR( v->ub4[coloroffset], c )
-#define VERT_COPY_RGBA( v0, v1 ) if (coloroffset) v0->ui[coloroffset] = v1->ui[coloroffset]
-#define VERT_SAVE_RGBA( idx )    if (coloroffset) color[idx] = v[idx]->ui[coloroffset]
-#define VERT_RESTORE_RGBA( idx ) if (coloroffset) v[idx]->ui[coloroffset] = color[idx]
+#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset]
+
+#define VERT_SET_SPEC( v0, c ) \
+do { \
+   if (specoffset) { \
+   UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.red, (c)[0]); \
+   UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.green, (c)[1]); \
+   UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.blue, (c)[2]); \
+   } \
+} while (0)
 
-#define R300_SPEC( dst, src )                          \
-do {                                                   \
-   UNCLAMPED_FLOAT_TO_UBYTE((dst)[0], (src)[2]);       \
-   UNCLAMPED_FLOAT_TO_UBYTE((dst)[1], (src)[1]);       \
-   UNCLAMPED_FLOAT_TO_UBYTE((dst)[2], (src)[0]);       \
+#define VERT_COPY_SPEC( v0, v1 ) \
+do { \
+   if (specoffset) { \
+       v0->v.specular.red = v1->v.specular.red; \
+       v0->v.specular.green = v1->v.specular.green; \
+       v0->v.specular.blue = v1->v.specular.blue; \
+   } \
 } while (0)
 
-#define VERT_SET_SPEC( v, c )    if (specoffset) R300_SPEC( v->ub4[specoffset], c )
-#define VERT_COPY_SPEC( v0, v1 ) if (specoffset) COPY_3V(v0->ub4[specoffset], v1->ub4[specoffset])
+#define VERT_SAVE_RGBA( idx )    color[idx] = v[idx]->ui[coloroffset]
+#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx]
 #define VERT_SAVE_SPEC( idx )    if (specoffset) spec[idx] = v[idx]->ui[specoffset]
 #define VERT_RESTORE_SPEC( idx ) if (specoffset) v[idx]->ui[specoffset] = spec[idx]
 
@@ -445,7 +376,7 @@ do {                                                        \
 
 #define LOCAL_VARS(n)                                                  \
    r300ContextPtr rmesa = R300_CONTEXT(ctx);                   \
-   GLuint color[n], spec[n];                                           \
+   GLuint color[n] = { 0, }, spec[n] = { 0, };                         \
    GLuint coloroffset = rmesa->swtcl.coloroffset;      \
    GLuint specoffset = rmesa->swtcl.specoffset;                        \
    (void) color; (void) spec; (void) coloroffset; (void) specoffset;
@@ -455,7 +386,7 @@ do {                                                        \
  ***********************************************************************/
 
 #define RASTERIZE(x) r300RasterPrimitive( ctx, reduced_prim[x] )
-#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive
+#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive
 #undef TAG
 #define TAG(x) x
 #include "tnl_dd/t_dd_unfilled.h"
@@ -471,26 +402,15 @@ do {                                                      \
 #define TAG(x) x
 #include "tnl_dd/t_dd_tritmp.h"
 
-#define IND (R300_TWOSIDE_BIT)
-#define TAG(x) x##_twoside
-#include "tnl_dd/t_dd_tritmp.h"
-
 #define IND (R300_UNFILLED_BIT)
 #define TAG(x) x##_unfilled
 #include "tnl_dd/t_dd_tritmp.h"
 
-#define IND (R300_TWOSIDE_BIT|R300_UNFILLED_BIT)
-#define TAG(x) x##_twoside_unfilled
-#include "tnl_dd/t_dd_tritmp.h"
-
-
 
 static void init_rast_tab( void )
 {
    init();
-   init_twoside();
    init_unfilled();
-   init_twoside_unfilled();
 }
 
 /**********************************************************************/
@@ -512,8 +432,8 @@ static void init_rast_tab( void )
 #undef LOCAL_VARS
 #define LOCAL_VARS                                             \
    r300ContextPtr rmesa = R300_CONTEXT(ctx);           \
-   const GLuint vertsize = rmesa->swtcl.vertex_size;           \
-   const char *r300verts = (char *)rmesa->swtcl.verts;         \
+   const GLuint vertsize = rmesa->radeon.swtcl.vertex_size;            \
+   const char *r300verts = (char *)rmesa->radeon.swtcl.verts;          \
    const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;       \
    const GLboolean stipple = ctx->Line.StippleFlag;            \
    (void) elt; (void) stipple;
@@ -542,10 +462,9 @@ static void r300ChooseRenderState( GLcontext *ctx )
        GLuint index = 0;
        GLuint flags = ctx->_TriangleCaps;
 
-       if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R300_TWOSIDE_BIT;
        if (flags & DD_TRI_UNFILLED)      index |= R300_UNFILLED_BIT;
 
-       if (index != rmesa->swtcl.RenderIndex) {
+       if (index != rmesa->radeon.swtcl.RenderIndex) {
                tnl->Driver.Render.Points = rast_tab[index].points;
                tnl->Driver.Render.Line = rast_tab[index].line;
                tnl->Driver.Render.ClippedLine = rast_tab[index].line;
@@ -562,62 +481,61 @@ static void r300ChooseRenderState( GLcontext *ctx )
                        tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
                }
 
-               rmesa->swtcl.RenderIndex = index;
+               rmesa->radeon.swtcl.RenderIndex = index;
        }
 }
 
 
-static void r300RenderStart(GLcontext *ctx)
+void r300RenderStart(GLcontext *ctx)
 {
-        r300ContextPtr rmesa = R300_CONTEXT( ctx );
-       //      fprintf(stderr, "%s\n", __FUNCTION__);
+       r300ContextPtr rmesa = R300_CONTEXT( ctx );
 
-       r300ChooseRenderState(ctx);     
-       r300SetVertexFormat(ctx);
+       r300ChooseRenderState(ctx);
 
        r300UpdateShaders(rmesa);
+
+       r300PrepareVertices(ctx);
+
+       r300ValidateBuffers(ctx);
+
        r300UpdateShaderStates(rmesa);
 
        r300EmitCacheFlush(rmesa);
-       
-       if (rmesa->dma.flush != 0 && 
-           rmesa->dma.flush != flush_last_swtcl_prim)
-               rmesa->dma.flush( rmesa );
 
+       /* investigate if we can put back flush optimisation if needed */
+       if (rmesa->radeon.dma.flush != NULL) {
+               rmesa->radeon.dma.flush(ctx);
+       }
 }
 
-static void r300RenderFinish(GLcontext *ctx)
+void r300RenderFinish(GLcontext *ctx)
 {
 }
 
 static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim )
 {
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
-       
-       if (rmesa->swtcl.hw_primitive != hwprim) {
-               R300_NEWPRIM( rmesa );
-               rmesa->swtcl.hw_primitive = hwprim;
+
+       if (rmesa->radeon.swtcl.hw_primitive != hwprim) {
+               R300_NEWPRIM( rmesa );
+               rmesa->radeon.swtcl.hw_primitive = hwprim;
        }
 }
 
-static void r300RenderPrimitive(GLcontext *ctx, GLenum prim)
+void r300RenderPrimitive(GLcontext *ctx, GLenum prim)
 {
 
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
-       rmesa->swtcl.render_primitive = prim;
+       rmesa->radeon.swtcl.render_primitive = prim;
 
        if ((prim == GL_TRIANGLES) && (ctx->_TriangleCaps & DD_TRI_UNFILLED))
-         return;
+               return;
 
        r300RasterPrimitive( ctx, reduced_prim[prim] );
-       //      fprintf(stderr, "%s\n", __FUNCTION__);
-       
 }
 
-static void r300ResetLineStipple(GLcontext *ctx)
+void r300ResetLineStipple(GLcontext *ctx)
 {
-
-
 }
 
 void r300InitSwtcl(GLcontext *ctx)
@@ -625,12 +543,12 @@ void r300InitSwtcl(GLcontext *ctx)
        TNLcontext *tnl = TNL_CONTEXT(ctx);
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
        static int firsttime = 1;
-       
+
        if (firsttime) {
                init_rast_tab();
                firsttime = 0;
        }
-       
+
        tnl->Driver.Render.Start = r300RenderStart;
        tnl->Driver.Render.Finish = r300RenderFinish;
        tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive;
@@ -638,60 +556,73 @@ void r300InitSwtcl(GLcontext *ctx)
        tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
        tnl->Driver.Render.CopyPV = _tnl_copy_pv;
        tnl->Driver.Render.Interp = _tnl_interp;
-       
+
        /* FIXME: what are these numbers? */
-       _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, 
+       _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12,
                            48 * sizeof(GLfloat) );
-       
-       rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
-       rmesa->swtcl.RenderIndex = ~0;
-       rmesa->swtcl.render_primitive = GL_TRIANGLES;
-       rmesa->swtcl.hw_primitive = 0;  
+
+       rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
+       rmesa->radeon.swtcl.RenderIndex = ~0;
+       rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES;
+       rmesa->radeon.swtcl.hw_primitive = 0;
 
        _tnl_invalidate_vertex_state( ctx, ~0 );
        _tnl_invalidate_vertices( ctx, ~0 );
-       RENDERINPUTS_ZERO( rmesa->tnl_index_bitset );
 
        _tnl_need_projected_coords( ctx, GL_FALSE );
-       r300ChooseRenderState(ctx);
-
-       _mesa_validate_all_lighting_tables( ctx ); 
-
-       tnl->Driver.NotifyMaterialChange = 
-         _mesa_validate_all_lighting_tables;
 }
 
 void r300DestroySwtcl(GLcontext *ctx)
 {
 }
 
-void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset)
+static void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct radeon_bo *bo, GLuint offset)
 {
-       int cmd_reserved = 0;
-       int cmd_written = 0;
+       BATCH_LOCALS(&rmesa->radeon);
 
-       drm_radeon_cmd_header_t *cmd = NULL;
        if (RADEON_DEBUG & DEBUG_VERTS)
-         fprintf(stderr, "%s:  vertex_size %d, offset 0x%x \n",
-                 __FUNCTION__, vertex_size, offset);
-
-       start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2), 2);
-       e32(1);
-       e32(vertex_size | (vertex_size << 8));
-       e32(offset);
+               fprintf(stderr, "%s:  vertex_size %d, offset 0x%x \n",
+                       __FUNCTION__, vertex_size, offset);
+
+       BEGIN_BATCH(7);
+       OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2);
+       OUT_BATCH(1);
+       OUT_BATCH(vertex_size | (vertex_size << 8));
+       OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
+       END_BATCH();
 }
 
-void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr)
+static void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr)
 {
-
-       int cmd_reserved = 0;
-       int cmd_written = 0;
+       BATCH_LOCALS(&rmesa->radeon);
        int type, num_verts;
-       drm_radeon_cmd_header_t *cmd = NULL;
 
        type = r300PrimitiveType(rmesa, primitive);
        num_verts = r300NumVerts(rmesa, vertex_nr, primitive);
-       
-       start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0);
-       e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type);
+
+       BEGIN_BATCH(3);
+       OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
+       OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type);
+       END_BATCH();
+}
+
+void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
+{
+       r300ContextPtr rmesa = R300_CONTEXT(ctx);
+
+       rcommonEnsureCmdBufSpace(&rmesa->radeon,
+                          rmesa->radeon.hw.max_state_size + (12*sizeof(int)),
+                          __FUNCTION__);
+       radeonEmitState(&rmesa->radeon);
+    r300_emit_scissor(ctx);
+       r300EmitVertexAOS(rmesa,
+                       rmesa->radeon.swtcl.vertex_size,
+                       rmesa->radeon.dma.current,
+                       current_offset);
+
+       r300EmitVbufPrim(rmesa,
+                  rmesa->radeon.swtcl.hw_primitive,
+                  rmesa->radeon.swtcl.numverts);
+       r300EmitCacheFlush(rmesa);
+       COMMIT_BATCH();
 }