r300: fix WPOS for SWTCL
[mesa.git] / src / mesa / drivers / dri / r300 / r300_swtcl.c
index 73e3c51b9a7e0b4c2e29e3267bb8392ad5f78993..56ed519cf414106fcc7e6c0ed7d0723d14cc4320 100644 (file)
@@ -28,37 +28,18 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 /*
  * Authors:
  *   Dave Airlie <airlied@linux.ie>
+ *   Maciej Cencora <m.cencora@gmail.com>
  */
 
-/* derived from r200 swtcl path */
-
-
-
-#include "main/glheader.h"
-#include "main/mtypes.h"
-#include "main/colormac.h"
-#include "main/enums.h"
-#include "main/image.h"
-#include "main/imports.h"
-#include "main/light.h"
-#include "main/macros.h"
-
-#include "swrast/s_context.h"
-#include "swrast/s_fog.h"
-#include "swrast_setup/swrast_setup.h"
-#include "math/m_translate.h"
 #include "tnl/tnl.h"
-#include "tnl/t_context.h"
 #include "tnl/t_pipeline.h"
 
-#include "r300_context.h"
-#include "r300_swtcl.h"
 #include "r300_state.h"
-#include "r300_ioctl.h"
+#include "r300_swtcl.h"
 #include "r300_emit.h"
+#include "r300_tex.h"
+#include "r300_render.h"
 
-void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct radeon_bo *bo, GLuint offset);
-void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr);
 #define EMIT_ATTR( ATTR, STYLE )                                       \
 do {                                                                   \
    rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR);    \
@@ -74,150 +55,179 @@ do {                                                                      \
    rmesa->radeon.swtcl.vertex_attr_count++;                                    \
 } while (0)
 
-static void r300SetVertexFormat( GLcontext *ctx )
+#define ADD_ATTR(_attr, _format, _dst_loc, _swizzle, _write_mask, _normalize) \
+do { \
+       attrs[num_attrs].element = (_attr); \
+       attrs[num_attrs].data_type = (_format); \
+       attrs[num_attrs].dst_loc = (_dst_loc); \
+       attrs[num_attrs].swizzle = (_swizzle); \
+       attrs[num_attrs].write_mask = (_write_mask); \
+       attrs[num_attrs]._signed = 0; \
+       attrs[num_attrs].normalize = (_normalize); \
+       ++num_attrs; \
+} while (0)
+
+void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead,  GLuint *_OutputsWritten)
 {
        r300ContextPtr rmesa = R300_CONTEXT( ctx );
        TNLcontext *tnl = TNL_CONTEXT(ctx);
        struct vertex_buffer *VB = &tnl->vb;
-       DECLARE_RENDERINPUTS(index_bitset);
-       GLuint InputsRead = 0, OutputsWritten = 0;
-       int vap_fmt_0 = 0;
-       int offset = 0;
-       int vte = 0;
-       GLint inputs[VERT_ATTRIB_MAX];
-       GLint tab[VERT_ATTRIB_MAX];
-       int swizzle[VERT_ATTRIB_MAX][4];
-       GLuint i, nr;
-       GLuint sz, vap_fmt_1 = 0;
-
-       DECLARE_RENDERINPUTS(render_inputs_bitset);
-       RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset);
-       RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset );
-       RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset);
-
-       vte = rmesa->hw.vte.cmd[1];
-       vte &= ~(R300_VTX_XY_FMT | R300_VTX_Z_FMT | R300_VTX_W0_FMT);
-       /* Important:
-        */
-       if ( VB->NdcPtr != NULL ) {
-               VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
-               vte |= R300_VTX_XY_FMT | R300_VTX_Z_FMT;
-       }
-       else {
-               VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr;
-               vte |= R300_VTX_W0_FMT;
-       }
-
-       assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL );
+       int first_free_tex = 0;
+       GLuint InputsRead = 0;
+       GLuint OutputsWritten = 0;
+       int num_attrs = 0;
+       GLuint fp_reads = rmesa->selected_fp->Base->InputsRead;
+       struct vertex_attribute *attrs = rmesa->vbuf.attribs;
+
+       rmesa->swtcl.coloroffset = rmesa->swtcl.specoffset = 0;
        rmesa->radeon.swtcl.vertex_attr_count = 0;
 
-       /* EMIT_ATTR's must be in order as they tell t_vertex.c how to
-        * build up a hardware vertex.
-        */
-       if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POS)) {
-               sz = VB->AttribPtr[VERT_ATTRIB_POS]->size;
-               InputsRead |= 1 << VERT_ATTRIB_POS;
-               OutputsWritten |= 1 << VERT_RESULT_HPOS;
-               EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_1F + sz - 1 );
-               offset = sz;
-       } else {
-               offset = 4;
-               EMIT_PAD(4 * sizeof(float));
-       }
+       /* We always want non Ndc coords format */
+       VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr;
 
-       if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POINTSIZE )) {
-               EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F );
-               vap_fmt_0 |=  R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
-               offset += 1;
-       }
+       /* Always write position vector */
+       InputsRead |= 1 << VERT_ATTRIB_POS;
+       OutputsWritten |= 1 << VERT_RESULT_HPOS;
+       EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F );
+       ADD_ATTR(VERT_ATTRIB_POS, R300_DATA_TYPE_FLOAT_4, SWTCL_OVM_POS, SWIZZLE_XYZW, MASK_XYZW, 0);
+       rmesa->swtcl.coloroffset = 4;
 
-       if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_COLOR0)) {
-               sz = VB->AttribPtr[VERT_ATTRIB_COLOR0]->size;
-               rmesa->swtcl.coloroffset = offset;
+       if (fp_reads & FRAG_BIT_COL0) {
                InputsRead |= 1 << VERT_ATTRIB_COLOR0;
                OutputsWritten |= 1 << VERT_RESULT_COL0;
-               EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_1F + sz - 1 );
-               offset += sz;
+#if MESA_LITTLE_ENDIAN
+               EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_RGBA );
+               ADD_ATTR(VERT_ATTRIB_COLOR0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR0, SWIZZLE_XYZW, MASK_XYZW, 1);
+#else
+               EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_ABGR );
+               ADD_ATTR(VERT_ATTRIB_COLOR0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR0, SWIZZLE_XYZW, MASK_XYZW, 1);
+#endif
        }
 
-       rmesa->swtcl.specoffset = 0;
-       if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) {
-               sz = VB->AttribPtr[VERT_ATTRIB_COLOR1]->size;
-               rmesa->swtcl.specoffset = offset;
-               EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_1F + sz - 1 );
+       if (fp_reads & FRAG_BIT_COL1) {
+               GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
                InputsRead |= 1 << VERT_ATTRIB_COLOR1;
                OutputsWritten |= 1 << VERT_RESULT_COL1;
+#if MESA_LITTLE_ENDIAN
+               EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4UB_4F_RGBA );
+               ADD_ATTR(VERT_ATTRIB_COLOR1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR1, swiz, MASK_XYZW, 1);
+#else
+               EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4UB_4F_ABGR );
+               ADD_ATTR(VERT_ATTRIB_COLOR1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR1, swiz, MASK_XYZW, 1);
+#endif
+               rmesa->swtcl.specoffset = rmesa->swtcl.coloroffset + 1;
        }
 
-       if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {
-               int i;
-
-               for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
-                       if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) {
-                               sz = VB->TexCoordPtr[i]->size;
-                               InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i);
-                               OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
-                               EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_1F + sz - 1 );
-                               vap_fmt_1 |= sz << (3 * i);
-                       }
+       if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) {
+               VB->AttribPtr[VERT_ATTRIB_GENERIC0] = VB->ColorPtr[1];
+               OutputsWritten |= 1 << VERT_RESULT_BFC0;
+#if MESA_LITTLE_ENDIAN
+               EMIT_ATTR( _TNL_ATTRIB_GENERIC0, EMIT_4UB_4F_RGBA );
+               ADD_ATTR(VERT_ATTRIB_GENERIC0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR2, SWIZZLE_XYZW, MASK_XYZW, 1);
+#else
+               EMIT_ATTR( _TNL_ATTRIB_GENERIC0, EMIT_4UB_4F_ABGR );
+               ADD_ATTR(VERT_ATTRIB_GENERIC0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR2, SWIZZLE_XYZW, MASK_XYZW, 1);
+#endif
+               if (fp_reads & FRAG_BIT_COL1) {
+                       VB->AttribPtr[VERT_ATTRIB_GENERIC1] = VB->SecondaryColorPtr[1];
+                       GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
+                       OutputsWritten |= 1 << VERT_RESULT_BFC1;
+#if MESA_LITTLE_ENDIAN
+                       EMIT_ATTR( _TNL_ATTRIB_GENERIC1, EMIT_4UB_4F_RGBA );
+                       ADD_ATTR(VERT_ATTRIB_GENERIC1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR3, swiz, MASK_XYZW, 1);
+#else
+                       EMIT_ATTR( _TNL_ATTRIB_GENERIC1, EMIT_4UB_4F_ABGR );
+                       ADD_ATTR(VERT_ATTRIB_GENERIC1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR3, swiz, MASK_XYZW, 1);
+#endif
                }
        }
 
-       for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
-               if (InputsRead & (1 << i)) {
-                       inputs[i] = nr++;
-               } else {
-                       inputs[i] = -1;
-               }
+       if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_POINTSIZE )) {
+               GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO);
+               InputsRead |= 1 << VERT_ATTRIB_POINT_SIZE;
+               OutputsWritten |= 1 << VERT_RESULT_PSIZ;
+               EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F );
+               ADD_ATTR(VERT_ATTRIB_POINT_SIZE, R300_DATA_TYPE_FLOAT_1, SWTCL_OVM_POINT_SIZE, swiz, MASK_X, 0);
        }
 
-       /* Fixed, apply to vir0 only */
-       if (InputsRead & (1 << VERT_ATTRIB_POS))
-               inputs[VERT_ATTRIB_POS] = 0;
-       if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
-               inputs[VERT_ATTRIB_COLOR0] = 2;
-       if (InputsRead & (1 << VERT_ATTRIB_COLOR1))
-               inputs[VERT_ATTRIB_COLOR1] = 3;
-       for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++)
-               if (InputsRead & (1 << i))
-                       inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
-
-       for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
-               if (InputsRead & (1 << i)) {
-                       tab[nr++] = i;
-               }
+       if (rmesa->selected_fp->wpos_attr != FRAG_ATTRIB_MAX) {
+               int tex_id = rmesa->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0;
+
+               VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_POS];
+               VB->TexCoordPtr[tex_id] = VB->AttribPtr[VERT_ATTRIB_POS];
+               RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id);
        }
 
-       for (i = 0; i < nr; i++) {
-               int ci;
+       if (rmesa->selected_fp->fog_attr != FRAG_ATTRIB_MAX) {
+               int tex_id = rmesa->selected_fp->fog_attr - FRAG_ATTRIB_TEX0;
 
-               swizzle[i][0] = SWIZZLE_ZERO;
-               swizzle[i][1] = SWIZZLE_ZERO;
-               swizzle[i][2] = SWIZZLE_ZERO;
-               swizzle[i][3] = SWIZZLE_ONE;
+               VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG];
+               VB->TexCoordPtr[tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG];
+               RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id);
+       }
 
-               for (ci = 0; ci < VB->AttribPtr[tab[i]]->size; ci++) {
-                       swizzle[i][ci] = ci;
+       /**
+        *  Sending only one texcoord component may lead to lock up,
+        *  so for all textures always output 4 texcoord components to RS.
+        */
+       {
+               int i;
+               GLuint swiz, format, hw_format;
+               for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+                       if (fp_reads & FRAG_BIT_TEX(i)) {
+                               switch (VB->TexCoordPtr[i]->size) {
+                                       case 1:
+                                               format = EMIT_1F;
+                                               hw_format = R300_DATA_TYPE_FLOAT_1;
+                                               swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE);
+                                               break;
+                                       case 2:
+                                               format = EMIT_2F;
+                                               hw_format = R300_DATA_TYPE_FLOAT_2;
+                                               swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ONE);
+                                               break;
+                                       case 3:
+                                               format = EMIT_3F;
+                                               hw_format = R300_DATA_TYPE_FLOAT_3;
+                                               swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
+                                               break;
+                                       case 4:
+                                               format = EMIT_4F;
+                                               hw_format = R300_DATA_TYPE_FLOAT_4;
+                                               swiz = SWIZZLE_XYZW;
+                                               break;
+                                       default:
+                                               continue;
+                               }
+                               InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i);
+                               OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
+                               EMIT_ATTR(_TNL_ATTRIB_TEX(i), format);
+                               ADD_ATTR(VERT_ATTRIB_TEX0 + i, hw_format, SWTCL_OVM_TEX(first_free_tex), swiz, MASK_XYZW, 0);
+                               ++first_free_tex;
+                       }
                }
        }
 
+       if (first_free_tex >= ctx->Const.MaxTextureUnits) {
+               fprintf(stderr, "\tout of free texcoords to write fog coordinate\n");
+               _mesa_exit(-1);
+       }
+
        R300_NEWPRIM(rmesa);
-       R300_STATECHANGE(rmesa, vir[0]);
-       ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count =
-               r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0],
-                                  VB->AttribPtr, inputs, tab, nr);
-       R300_STATECHANGE(rmesa, vir[1]);
-       ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
-               r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
-                                  nr);
-
-       R300_STATECHANGE(rmesa, vic);
-       rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
-       rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
-
-       R300_STATECHANGE(rmesa, vof);
-       rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten);
-       rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = vap_fmt_1;
+       rmesa->vbuf.num_attribs = num_attrs;
+       *_InputsRead = InputsRead;
+       *_OutputsWritten = OutputsWritten;
+
+       RENDERINPUTS_COPY(rmesa->render_inputs_bitset, tnl->render_inputs_bitset);
+}
+
+static void r300PrepareVertices(GLcontext *ctx)
+{
+       r300ContextPtr rmesa = R300_CONTEXT(ctx);
+       GLuint InputsRead, OutputsWritten;
+
+       r300ChooseSwtclVertexFormat(ctx, &InputsRead, &OutputsWritten);
+       r300SetupVAP(ctx, InputsRead, OutputsWritten);
 
        rmesa->radeon.swtcl.vertex_size =
                _tnl_install_attrs( ctx,
@@ -226,31 +236,23 @@ static void r300SetVertexFormat( GLcontext *ctx )
                                    NULL, 0 );
 
        rmesa->radeon.swtcl.vertex_size /= 4;
-
-       RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset );
-
-
-       R300_STATECHANGE(rmesa, vte);
-       rmesa->hw.vte.cmd[1] = vte;
-       rmesa->hw.vte.cmd[2] = rmesa->radeon.swtcl.vertex_size;
 }
 
+
 static GLuint reduced_prim[] = {
-  GL_POINTS,
-  GL_LINES,
-  GL_LINES,
-  GL_LINES,
-  GL_TRIANGLES,
-  GL_TRIANGLES,
-  GL_TRIANGLES,
-  GL_TRIANGLES,
-  GL_TRIANGLES,
-  GL_TRIANGLES,
+       GL_POINTS,
+       GL_LINES,
+       GL_LINES,
+       GL_LINES,
+       GL_TRIANGLES,
+       GL_TRIANGLES,
+       GL_TRIANGLES,
+       GL_TRIANGLES,
+       GL_TRIANGLES,
+       GL_TRIANGLES,
 };
 
 static void r300RasterPrimitive( GLcontext *ctx, GLuint prim );
-static void r300RenderPrimitive( GLcontext *ctx, GLenum prim );
-//static void r300ResetLineStipple( GLcontext *ctx );
 
 /***********************************************************************
  *                    Emit primitives as inline vertices               *
@@ -279,8 +281,6 @@ static void r300RenderPrimitive( GLcontext *ctx, GLenum prim );
    const char *r300verts = (char *)rmesa->radeon.swtcl.verts;
 #define VERT(x) (r300Vertex *)(r300verts + ((x) * vertsize * sizeof(int)))
 #define VERTEX r300Vertex
-#define DO_DEBUG_VERTS (1 && (RADEON_DEBUG & DEBUG_VERTS))
-#define PRINT_VERTEX(x)
 #undef TAG
 #define TAG(x) r300_##x
 #include "tnl_dd/t_dd_triemit.h"
@@ -300,9 +300,8 @@ static void r300RenderPrimitive( GLcontext *ctx, GLenum prim );
  *              Build render functions from dd templates               *
  ***********************************************************************/
 
-#define R300_TWOSIDE_BIT       0x01
-#define R300_UNFILLED_BIT      0x02
-#define R300_MAX_TRIFUNC       0x04
+#define R300_UNFILLED_BIT      0x01
+#define R300_MAX_TRIFUNC       0x02
 
 static struct {
    tnl_points_func             points;
@@ -313,9 +312,9 @@ static struct {
 
 #define DO_FALLBACK  0
 #define DO_UNFILLED (IND & R300_UNFILLED_BIT)
-#define DO_TWOSIDE  (IND & R300_TWOSIDE_BIT)
+#define DO_TWOSIDE   0
 #define DO_FLAT      0
-#define DO_OFFSET     0
+#define DO_OFFSET    0
 #define DO_TRI       1
 #define DO_QUAD      1
 #define DO_LINE      1
@@ -337,31 +336,37 @@ static struct {
 #define AREA_IS_CCW( a ) (a < 0)
 #define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + (e*rmesa->radeon.swtcl.vertex_size*sizeof(int)))
 
-/* Only used to pull back colors into vertices (ie, we know color is
- * floating point).
- */
-#define R300_COLOR( dst, src )                         \
-do {                                                   \
-   UNCLAMPED_FLOAT_TO_UBYTE((dst)[0], (src)[2]);       \
-   UNCLAMPED_FLOAT_TO_UBYTE((dst)[1], (src)[1]);       \
-   UNCLAMPED_FLOAT_TO_UBYTE((dst)[2], (src)[0]);       \
-   UNCLAMPED_FLOAT_TO_UBYTE((dst)[3], (src)[3]);       \
+#define VERT_SET_RGBA( v, c ) \
+do { \
+   r300_color_t *color = (r300_color_t *)&((v)->ui[coloroffset]); \
+   UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]); \
+   UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]); \
+   UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]); \
+   UNCLAMPED_FLOAT_TO_UBYTE(color->alpha, (c)[3]); \
 } while (0)
 
-#define VERT_SET_RGBA( v, c )    if (coloroffset) R300_COLOR( v->ub4[coloroffset], c )
-#define VERT_COPY_RGBA( v0, v1 ) if (coloroffset) v0->ui[coloroffset] = v1->ui[coloroffset]
-#define VERT_SAVE_RGBA( idx )    if (coloroffset) color[idx] = v[idx]->ui[coloroffset]
-#define VERT_RESTORE_RGBA( idx ) if (coloroffset) v[idx]->ui[coloroffset] = color[idx]
+#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset]
+
+#define VERT_SET_SPEC( v0, c ) \
+do { \
+   if (specoffset) { \
+   UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.red, (c)[0]); \
+   UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.green, (c)[1]); \
+   UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.blue, (c)[2]); \
+   } \
+} while (0)
 
-#define R300_SPEC( dst, src )                          \
-do {                                                   \
-   UNCLAMPED_FLOAT_TO_UBYTE((dst)[0], (src)[2]);       \
-   UNCLAMPED_FLOAT_TO_UBYTE((dst)[1], (src)[1]);       \
-   UNCLAMPED_FLOAT_TO_UBYTE((dst)[2], (src)[0]);       \
+#define VERT_COPY_SPEC( v0, v1 ) \
+do { \
+   if (specoffset) { \
+       v0->v.specular.red = v1->v.specular.red; \
+       v0->v.specular.green = v1->v.specular.green; \
+       v0->v.specular.blue = v1->v.specular.blue; \
+   } \
 } while (0)
 
-#define VERT_SET_SPEC( v, c )    if (specoffset) R300_SPEC( v->ub4[specoffset], c )
-#define VERT_COPY_SPEC( v0, v1 ) if (specoffset) COPY_3V(v0->ub4[specoffset], v1->ub4[specoffset])
+#define VERT_SAVE_RGBA( idx )    color[idx] = v[idx]->ui[coloroffset]
+#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx]
 #define VERT_SAVE_SPEC( idx )    if (specoffset) spec[idx] = v[idx]->ui[specoffset]
 #define VERT_RESTORE_SPEC( idx ) if (specoffset) v[idx]->ui[specoffset] = spec[idx]
 
@@ -371,7 +376,7 @@ do {                                                        \
 
 #define LOCAL_VARS(n)                                                  \
    r300ContextPtr rmesa = R300_CONTEXT(ctx);                   \
-   GLuint color[n], spec[n];                                           \
+   GLuint color[n] = { 0, }, spec[n] = { 0, };                         \
    GLuint coloroffset = rmesa->swtcl.coloroffset;      \
    GLuint specoffset = rmesa->swtcl.specoffset;                        \
    (void) color; (void) spec; (void) coloroffset; (void) specoffset;
@@ -397,26 +402,15 @@ do {                                                      \
 #define TAG(x) x
 #include "tnl_dd/t_dd_tritmp.h"
 
-#define IND (R300_TWOSIDE_BIT)
-#define TAG(x) x##_twoside
-#include "tnl_dd/t_dd_tritmp.h"
-
 #define IND (R300_UNFILLED_BIT)
 #define TAG(x) x##_unfilled
 #include "tnl_dd/t_dd_tritmp.h"
 
-#define IND (R300_TWOSIDE_BIT|R300_UNFILLED_BIT)
-#define TAG(x) x##_twoside_unfilled
-#include "tnl_dd/t_dd_tritmp.h"
-
-
 
 static void init_rast_tab( void )
 {
    init();
-   init_twoside();
    init_unfilled();
-   init_twoside_unfilled();
 }
 
 /**********************************************************************/
@@ -468,7 +462,6 @@ static void r300ChooseRenderState( GLcontext *ctx )
        GLuint index = 0;
        GLuint flags = ctx->_TriangleCaps;
 
-       if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R300_TWOSIDE_BIT;
        if (flags & DD_TRI_UNFILLED)      index |= R300_UNFILLED_BIT;
 
        if (index != rmesa->radeon.swtcl.RenderIndex) {
@@ -493,26 +486,29 @@ static void r300ChooseRenderState( GLcontext *ctx )
 }
 
 
-static void r300RenderStart(GLcontext *ctx)
+void r300RenderStart(GLcontext *ctx)
 {
-        r300ContextPtr rmesa = R300_CONTEXT( ctx );
-       //      fprintf(stderr, "%s\n", __FUNCTION__);
+       r300ContextPtr rmesa = R300_CONTEXT( ctx );
 
        r300ChooseRenderState(ctx);
-       r300SetVertexFormat(ctx);
-
-       r300ValidateTextures(ctx);
 
        r300UpdateShaders(rmesa);
+
+       r300PrepareVertices(ctx);
+
+       r300ValidateBuffers(ctx);
+
        r300UpdateShaderStates(rmesa);
 
        r300EmitCacheFlush(rmesa);
+
+       /* investigate if we can put back flush optimisation if needed */
        if (rmesa->radeon.dma.flush != NULL) {
                rmesa->radeon.dma.flush(ctx);
        }
 }
 
-static void r300RenderFinish(GLcontext *ctx)
+void r300RenderFinish(GLcontext *ctx)
 {
 }
 
@@ -521,29 +517,25 @@ static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim )
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
 
        if (rmesa->radeon.swtcl.hw_primitive != hwprim) {
-               R300_NEWPRIM( rmesa );
+               R300_NEWPRIM( rmesa );
                rmesa->radeon.swtcl.hw_primitive = hwprim;
        }
 }
 
-static void r300RenderPrimitive(GLcontext *ctx, GLenum prim)
+void r300RenderPrimitive(GLcontext *ctx, GLenum prim)
 {
 
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
        rmesa->radeon.swtcl.render_primitive = prim;
 
        if ((prim == GL_TRIANGLES) && (ctx->_TriangleCaps & DD_TRI_UNFILLED))
-         return;
+               return;
 
        r300RasterPrimitive( ctx, reduced_prim[prim] );
-       //      fprintf(stderr, "%s\n", __FUNCTION__);
-
 }
 
-static void r300ResetLineStipple(GLcontext *ctx)
+void r300ResetLineStipple(GLcontext *ctx)
 {
-
-
 }
 
 void r300InitSwtcl(GLcontext *ctx)
@@ -576,22 +568,15 @@ void r300InitSwtcl(GLcontext *ctx)
 
        _tnl_invalidate_vertex_state( ctx, ~0 );
        _tnl_invalidate_vertices( ctx, ~0 );
-       RENDERINPUTS_ZERO( rmesa->tnl_index_bitset );
 
        _tnl_need_projected_coords( ctx, GL_FALSE );
-       r300ChooseRenderState(ctx);
-
-       _mesa_validate_all_lighting_tables( ctx );
-
-       tnl->Driver.NotifyMaterialChange =
-         _mesa_validate_all_lighting_tables;
 }
 
 void r300DestroySwtcl(GLcontext *ctx)
 {
 }
 
-void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct radeon_bo *bo, GLuint offset)
+static void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct radeon_bo *bo, GLuint offset)
 {
        BATCH_LOCALS(&rmesa->radeon);
 
@@ -599,7 +584,7 @@ void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct radeon_b
                fprintf(stderr, "%s:  vertex_size %d, offset 0x%x \n",
                        __FUNCTION__, vertex_size, offset);
 
-       BEGIN_BATCH(5);
+       BEGIN_BATCH(7);
        OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2);
        OUT_BATCH(1);
        OUT_BATCH(vertex_size | (vertex_size << 8));
@@ -607,7 +592,7 @@ void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct radeon_b
        END_BATCH();
 }
 
-void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr)
+static void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr)
 {
        BATCH_LOCALS(&rmesa->radeon);
        int type, num_verts;
@@ -623,21 +608,21 @@ void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr)
 
 void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
 {
-  r300ContextPtr rmesa = R300_CONTEXT(ctx);
+       r300ContextPtr rmesa = R300_CONTEXT(ctx);
 
-  rcommonEnsureCmdBufSpace(&rmesa->radeon,
-                          rmesa->hw.max_state_size + (12*sizeof(int)),
+       rcommonEnsureCmdBufSpace(&rmesa->radeon,
+                          rmesa->radeon.hw.max_state_size + (12*sizeof(int)),
                           __FUNCTION__);
-  r300EmitState(rmesa);
-  r300EmitVertexAOS(rmesa,
-                   rmesa->radeon.swtcl.vertex_size,
-                   rmesa->radeon.dma.current,
-                   current_offset);
-  
-  r300EmitVbufPrim(rmesa,
+       radeonEmitState(&rmesa->radeon);
+    r300_emit_scissor(ctx);
+       r300EmitVertexAOS(rmesa,
+                       rmesa->radeon.swtcl.vertex_size,
+                       rmesa->radeon.dma.current,
+                       current_offset);
+
+       r300EmitVbufPrim(rmesa,
                   rmesa->radeon.swtcl.hw_primitive,
                   rmesa->radeon.swtcl.numverts);
-  r300EmitCacheFlush(rmesa);
-  COMMIT_BATCH();
-
+       r300EmitCacheFlush(rmesa);
+       COMMIT_BATCH();
 }