r200 driver, brought over by Jon Smirl
authorKeith Whitwell <keith@tungstengraphics.com>
Wed, 6 Aug 2003 17:59:57 +0000 (17:59 +0000)
committerKeith Whitwell <keith@tungstengraphics.com>
Wed, 6 Aug 2003 17:59:57 +0000 (17:59 +0000)
41 files changed:
src/mesa/drivers/dri/r200/Doxyfile [new file with mode: 0644]
src/mesa/drivers/dri/r200/Makefile.X11 [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_cmdbuf.c [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_context.c [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_context.h [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_ioctl.c [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_ioctl.h [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_lock.c [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_lock.h [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_maos.c [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_maos.h [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_maos_arrays.c [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_maos_vbtmp.h [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_maos_verts.c [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_pixel.c [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_pixel.h [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_reg.h [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_sanity.c [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_sanity.h [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_screen.c [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_screen.h [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_span.c [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_span.h [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_state.c [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_state.c~ [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_state.h [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_state_init.c [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_swtcl.c [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_swtcl.h [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_tcl.c [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_tcl.h [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_tex.c [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_tex.h [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_texmem.c [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_texstate.c [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_vtxfmt.c [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_vtxfmt.h [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_vtxfmt_c.c [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_vtxfmt_sse.c [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_vtxfmt_x86.c [new file with mode: 0644]
src/mesa/drivers/dri/r200/r200_vtxtmp_x86.S [new file with mode: 0644]

diff --git a/src/mesa/drivers/dri/r200/Doxyfile b/src/mesa/drivers/dri/r200/Doxyfile
new file mode 100644 (file)
index 0000000..27b3d03
--- /dev/null
@@ -0,0 +1,232 @@
+# Doxyfile 1.3.2-Gideon
+
+#---------------------------------------------------------------------------
+# General configuration options
+#---------------------------------------------------------------------------
+PROJECT_NAME           = r200
+PROJECT_NUMBER         = $VERSION$
+OUTPUT_DIRECTORY       = 
+OUTPUT_LANGUAGE        = English
+USE_WINDOWS_ENCODING   = NO
+EXTRACT_ALL            = NO
+EXTRACT_PRIVATE        = NO
+EXTRACT_STATIC         = NO
+EXTRACT_LOCAL_CLASSES  = YES
+HIDE_UNDOC_MEMBERS     = NO
+HIDE_UNDOC_CLASSES     = NO
+HIDE_FRIEND_COMPOUNDS  = NO
+HIDE_IN_BODY_DOCS      = NO
+BRIEF_MEMBER_DESC      = YES
+REPEAT_BRIEF           = YES
+ALWAYS_DETAILED_SEC    = NO
+INLINE_INHERITED_MEMB  = NO
+FULL_PATH_NAMES        = NO
+STRIP_FROM_PATH        = 
+INTERNAL_DOCS          = NO
+CASE_SENSE_NAMES       = YES
+SHORT_NAMES            = NO
+HIDE_SCOPE_NAMES       = NO
+SHOW_INCLUDE_FILES     = YES
+JAVADOC_AUTOBRIEF      = NO
+MULTILINE_CPP_IS_BRIEF = NO
+DETAILS_AT_TOP         = NO
+INHERIT_DOCS           = YES
+INLINE_INFO            = YES
+SORT_MEMBER_DOCS       = YES
+DISTRIBUTE_GROUP_DOC   = NO
+TAB_SIZE               = 8
+GENERATE_TODOLIST      = YES
+GENERATE_TESTLIST      = YES
+GENERATE_BUGLIST       = YES
+GENERATE_DEPRECATEDLIST= YES
+ALIASES                = 
+ENABLED_SECTIONS       = 
+MAX_INITIALIZER_LINES  = 30
+OPTIMIZE_OUTPUT_FOR_C  = NO
+OPTIMIZE_OUTPUT_JAVA   = NO
+SHOW_USED_FILES        = YES
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+QUIET                  = NO
+WARNINGS               = YES
+WARN_IF_UNDOCUMENTED   = YES
+WARN_IF_DOC_ERROR      = YES
+WARN_FORMAT            = "$file:$line: $text"
+WARN_LOGFILE           = 
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+INPUT                  = /home/temp/Mesa/src/drv/r200
+FILE_PATTERNS          = *.c \
+                         *.cc \
+                         *.cxx \
+                         *.cpp \
+                         *.c++ \
+                         *.java \
+                         *.ii \
+                         *.ixx \
+                         *.ipp \
+                         *.i++ \
+                         *.inl \
+                         *.h \
+                         *.hh \
+                         *.hxx \
+                         *.hpp \
+                         *.h++ \
+                         *.idl \
+                         *.odl \
+                         *.cs \
+                         *.C \
+                         *.H \
+                         *.tlh \
+                         *.diff \
+                         *.patch \
+                         *.moc \
+                         *.xpm
+RECURSIVE              = yes
+EXCLUDE                = 
+EXCLUDE_SYMLINKS       = NO
+EXCLUDE_PATTERNS       = 
+EXAMPLE_PATH           = 
+EXAMPLE_PATTERNS       = *
+EXAMPLE_RECURSIVE      = NO
+IMAGE_PATH             = 
+INPUT_FILTER           = 
+FILTER_SOURCE_FILES    = NO
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+SOURCE_BROWSER         = NO
+INLINE_SOURCES         = NO
+STRIP_CODE_COMMENTS    = YES
+REFERENCED_BY_RELATION = YES
+REFERENCES_RELATION    = YES
+VERBATIM_HEADERS       = YES
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+ALPHABETICAL_INDEX     = NO
+COLS_IN_ALPHA_INDEX    = 5
+IGNORE_PREFIX          = 
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+GENERATE_HTML          = YES
+HTML_OUTPUT            = html
+HTML_FILE_EXTENSION    = .html
+HTML_HEADER            = 
+HTML_FOOTER            = 
+HTML_STYLESHEET        = 
+HTML_ALIGN_MEMBERS     = YES
+GENERATE_HTMLHELP      = NO
+CHM_FILE               = 
+HHC_LOCATION           = 
+GENERATE_CHI           = NO
+BINARY_TOC             = NO
+TOC_EXPAND             = NO
+DISABLE_INDEX          = NO
+ENUM_VALUES_PER_LINE   = 4
+GENERATE_TREEVIEW      = NO
+TREEVIEW_WIDTH         = 250
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+GENERATE_LATEX         = YES
+LATEX_OUTPUT           = latex
+LATEX_CMD_NAME         = latex
+MAKEINDEX_CMD_NAME     = makeindex
+COMPACT_LATEX          = NO
+PAPER_TYPE             = a4wide
+EXTRA_PACKAGES         = 
+LATEX_HEADER           = 
+PDF_HYPERLINKS         = NO
+USE_PDFLATEX           = NO
+LATEX_BATCHMODE        = NO
+LATEX_HIDE_INDICES     = NO
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+GENERATE_RTF           = NO
+RTF_OUTPUT             = rtf
+COMPACT_RTF            = NO
+RTF_HYPERLINKS         = NO
+RTF_STYLESHEET_FILE    = 
+RTF_EXTENSIONS_FILE    = 
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+GENERATE_MAN           = NO
+MAN_OUTPUT             = man
+MAN_EXTENSION          = .3
+MAN_LINKS              = NO
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+GENERATE_XML           = yes
+XML_OUTPUT             = xml
+XML_SCHEMA             = 
+XML_DTD                = 
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+GENERATE_AUTOGEN_DEF   = NO
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+GENERATE_PERLMOD       = NO
+PERLMOD_LATEX          = NO
+PERLMOD_PRETTY         = YES
+PERLMOD_MAKEVAR_PREFIX = 
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor   
+#---------------------------------------------------------------------------
+ENABLE_PREPROCESSING   = YES
+MACRO_EXPANSION        = NO
+EXPAND_ONLY_PREDEF     = NO
+SEARCH_INCLUDES        = YES
+INCLUDE_PATH           = 
+INCLUDE_FILE_PATTERNS  = 
+PREDEFINED             = 
+EXPAND_AS_DEFINED      = 
+SKIP_FUNCTION_MACROS   = YES
+#---------------------------------------------------------------------------
+# Configuration::addtions related to external references   
+#---------------------------------------------------------------------------
+TAGFILES               = 
+GENERATE_TAGFILE       = 
+ALLEXTERNALS           = NO
+EXTERNAL_GROUPS        = YES
+PERL_PATH              = /usr/bin/perl
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool   
+#---------------------------------------------------------------------------
+CLASS_DIAGRAMS         = YES
+HIDE_UNDOC_RELATIONS   = YES
+HAVE_DOT               = NO
+CLASS_GRAPH            = YES
+COLLABORATION_GRAPH    = YES
+UML_LOOK               = NO
+TEMPLATE_RELATIONS     = NO
+INCLUDE_GRAPH          = YES
+INCLUDED_BY_GRAPH      = YES
+CALL_GRAPH             = NO
+GRAPHICAL_HIERARCHY    = YES
+DOT_IMAGE_FORMAT       = png
+DOT_PATH               = 
+DOTFILE_DIRS           = 
+MAX_DOT_GRAPH_WIDTH    = 1024
+MAX_DOT_GRAPH_HEIGHT   = 1024
+MAX_DOT_GRAPH_DEPTH    = 1000
+GENERATE_LEGEND        = YES
+DOT_CLEANUP            = YES
+#---------------------------------------------------------------------------
+# Configuration::addtions related to the search engine   
+#---------------------------------------------------------------------------
+SEARCHENGINE           = NO
+CGI_NAME               = search.cgi
+CGI_URL                = 
+DOC_URL                = 
+DOC_ABSPATH            = 
+BIN_ABSPATH            = /usr/local/bin/
+EXT_DOC_PATHS          = 
diff --git a/src/mesa/drivers/dri/r200/Makefile.X11 b/src/mesa/drivers/dri/r200/Makefile.X11
new file mode 100644 (file)
index 0000000..b182ba5
--- /dev/null
@@ -0,0 +1,136 @@
+# $Id: Makefile.X11,v 1.1 2003/08/06 17:59:57 keithw Exp $
+
+# Mesa 3-D graphics library
+# Version:  5.0
+# Copyright (C) 1995-2002  Brian Paul
+
+TOP = ../../../../..
+
+SHARED_INCLUDES = $(INCLUDE_DIRS) -I. -I../common -Iserver
+MINIGLX_INCLUDES = -I$(TOP)/src/miniglx 
+
+DEFINES += \
+       -D_HAVE_SWRAST=1 \
+       -D_HAVE_SWTNL=1 \
+       -D_HAVE_SANITY=1 \
+       -D_HAVE_CODEGEN=1 \
+       -D_HAVE_LIGHTING=1 \
+       -D_HAVE_TEXGEN=1 \
+       -D_HAVE_USERCLIP=1 \
+       -DGLX_DIRECT_RENDERING 
+
+MINIGLX_SOURCES = server/radeon_dri.c 
+
+DRIVER_SOURCES = r200_context.c \
+                r200_ioctl.c \
+                r200_lock.c \
+                r200_screen.c \
+                r200_state.c \
+                r200_state_init.c \
+                ../common/mm.c \
+                ../common/utils.c \
+                ../common/texmem.c \
+                ../common/vblank.c \
+                r200_cmdbuf.c \
+                r200_pixel.c \
+                r200_tex.c \
+                r200_texmem.c \
+                r200_texstate.c \
+                r200_tcl.c \
+                r200_swtcl.c \
+                r200_span.c \
+                r200_maos.c \
+                r200_sanity.c \
+                r200_vtxfmt.c \
+                r200_vtxfmt_c.c \
+                r200_vtxfmt_sse.c \
+                r200_vtxfmt_x86.c 
+
+
+INCLUDES = $(MINIGLX_INCLUDES) \
+          $(SHARED_INCLUDES)
+
+
+C_SOURCES = $(DRIVER_SOURCES) \
+           $(MINIGLX_SOURCES) 
+
+MESA_MODULES = $(TOP)/src/mesa/mesa.a
+
+
+ifeq ($(WINDOW_SYSTEM),dri)
+WINOBJ=$(MESABUILDDIR)/dri/dri.a
+WINLIB=
+else
+WINOBJ=
+WINLIB=-L$(MESA)/src/miniglx
+endif
+
+ASM_SOURCES = 
+OBJECTS = $(C_SOURCES:.c=.o) \
+         $(ASM_SOURCES:.S=.o) 
+
+SYMLINKS = \
+       server/radeon_common.h \
+       server/radeon_dri.c \
+       server/radeon_dri.h \
+       server/radeon.h \
+       server/radeon_macros.h \
+       server/radeon_reg.h \
+       server/radeon_sarea.h \
+
+
+$(SYMLINKS):
+       rm -f $@ && ln -s ../../radeon/$@ $@
+
+
+### Include directories
+
+INCLUDE_DIRS = \
+       -I$(TOP)/include \
+       -I$(TOP)/src/mesa \
+       -I$(TOP)/src/mesa/main \
+       -I$(TOP)/src/mesa/glapi \
+       -I$(TOP)/src/mesa/math \
+       -I$(TOP)/src/mesa/transform \
+       -I$(TOP)/src/mesa/swrast \
+       -I$(TOP)/src/mesa/swrast_setup
+
+
+##### RULES #####
+
+.c.o:
+       $(CC) -c $(SHARED_INCLUDES) $(MINIGLX_INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
+
+.S.o:
+       $(CC) -c $(SHARED_INCLUDES) $(MINIGLX_INCLUDES) $(CFLAGS) $(DEFINES)  $< -o $@
+
+
+##### TARGETS #####
+
+targets: r200_dri.so
+
+r200_dri.so:  $(SYMLINKS) $(OBJECTS) $(MESA_MODULES) $(WINOBJ) Makefile.X11
+       rm -f $@ && gcc -o $@ -shared $(OBJECTS) $(MESA_MODULES) $(WINOBJ) $(WINLIB) -lc $(GL_LIB_DEPS)
+       rm -f $(TOP)/lib/r200_dri.so && \
+       install r200_dri.so $(TOP)/lib/r200_dri.so
+
+# Run 'make -f Makefile.X11 dep' to update the dependencies if you change
+# what's included by any source file.
+dep: $(C_SOURCES) $(ASM_SOURCES)
+       makedepend -fdepend -Y $(SHARED_INCLUDES) \
+               $(C_SOURCES) $(ASM_SOURCES)
+
+
+# Emacs tags
+tags:
+       etags `find . -name \*.[ch]` `find ../include`
+
+
+# Remove .o and backup files
+clean:
+       -rm -f *.o *~ *.o *~ *.so
+
+
+include $(TOP)/Make-config
+
+include depend
diff --git a/src/mesa/drivers/dri/r200/r200_cmdbuf.c b/src/mesa/drivers/dri/r200/r200_cmdbuf.c
new file mode 100644 (file)
index 0000000..a64b32d
--- /dev/null
@@ -0,0 +1,337 @@
+/* $XFree86$ */
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "glheader.h"
+#include "imports.h"
+#include "macros.h"
+#include "context.h"
+#include "swrast/swrast.h"
+#include "simple_list.h"
+
+#include "r200_context.h"
+#include "r200_state.h"
+#include "r200_ioctl.h"
+#include "r200_tcl.h"
+#include "r200_sanity.h"
+#include "radeon_reg.h"
+
+static void print_state_atom( struct r200_state_atom *state )
+{
+   int i;
+
+   fprintf(stderr, "emit %s/%d\n", state->name, state->cmd_size);
+
+   if (0 & R200_DEBUG & DEBUG_VERBOSE) 
+      for (i = 0 ; i < state->cmd_size ; i++) 
+        fprintf(stderr, "\t%s[%d]: %x\n", state->name, i, state->cmd[i]);
+
+}
+
+static void r200_emit_state_list( r200ContextPtr rmesa, 
+                                   struct r200_state_atom *list )
+{
+   struct r200_state_atom *state, *tmp;
+   char *dest;
+
+   foreach_s( state, tmp, list ) {
+      if (state->check( rmesa->glCtx, state->idx )) {
+        dest = r200AllocCmdBuf( rmesa, state->cmd_size * 4, __FUNCTION__);
+        memcpy( dest, state->cmd, state->cmd_size * 4);
+        move_to_head( &(rmesa->hw.clean), state );
+        if (R200_DEBUG & DEBUG_STATE) 
+           print_state_atom( state );
+      }
+      else if (R200_DEBUG & DEBUG_STATE)
+        fprintf(stderr, "skip state %s\n", state->name);
+   }
+}
+
+
+void r200EmitState( r200ContextPtr rmesa )
+{
+   struct r200_state_atom *state, *tmp;
+
+   if (R200_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   /* Somewhat overkill:
+    */
+   if ( rmesa->lost_context) {
+      if (R200_DEBUG & (DEBUG_STATE|DEBUG_PRIMS|DEBUG_IOCTL))
+        fprintf(stderr, "%s - lost context\n", __FUNCTION__); 
+
+      foreach_s( state, tmp, &(rmesa->hw.clean) ) 
+        move_to_tail(&(rmesa->hw.dirty), state );
+
+      rmesa->lost_context = 0;
+   }
+   else {
+      move_to_tail( &rmesa->hw.dirty, &rmesa->hw.mtl[0] ); 
+      /* odd bug? -- isosurf, cycle between reflect & lit */
+   }
+
+   r200_emit_state_list( rmesa, &rmesa->hw.dirty );
+}
+
+
+
+/* Fire a section of the retained (indexed_verts) buffer as a regular
+ * primtive.  
+ */
+extern void r200EmitVbufPrim( r200ContextPtr rmesa,
+                               GLuint primitive,
+                               GLuint vertex_nr )
+{
+   drmRadeonCmdHeader *cmd;
+
+   assert(!(primitive & R200_VF_PRIM_WALK_IND));
+   
+   r200EmitState( rmesa );
+   
+   if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_PRIMS))
+      fprintf(stderr, "%s cmd_used/4: %d prim %x nr %d\n", __FUNCTION__,
+             rmesa->store.cmd_used/4, primitive, vertex_nr);
+   
+   cmd = (drmRadeonCmdHeader *)r200AllocCmdBuf( rmesa, 3 * sizeof(*cmd),
+                                                 __FUNCTION__ );
+   cmd[0].i = 0;
+   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
+   cmd[1].i = R200_CP_CMD_3D_DRAW_VBUF_2;
+   cmd[2].i = (primitive | 
+              R200_VF_PRIM_WALK_LIST |
+              R200_VF_COLOR_ORDER_RGBA |
+              (vertex_nr << R200_VF_VERTEX_NUMBER_SHIFT));
+}
+
+
+void r200FlushElts( r200ContextPtr rmesa )
+{
+   int *cmd = (int *)(rmesa->store.cmd_buf + rmesa->store.elts_start);
+   int dwords;
+   int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 12)) / 2;
+
+   if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_PRIMS))
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   assert( rmesa->dma.flush == r200FlushElts );
+   rmesa->dma.flush = 0;
+
+   /* Cope with odd number of elts:
+    */
+   rmesa->store.cmd_used = (rmesa->store.cmd_used + 2) & ~2;
+   dwords = (rmesa->store.cmd_used - rmesa->store.elts_start) / 4;
+
+   cmd[1] |= (dwords - 3) << 16;
+   cmd[2] |= nr << R200_VF_VERTEX_NUMBER_SHIFT;
+
+   if (R200_DEBUG & DEBUG_SYNC) {
+      fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
+      r200Finish( rmesa->glCtx );
+   }
+}
+
+
+GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa,
+                                   GLuint primitive,
+                                   GLuint min_nr )
+{
+   drmRadeonCmdHeader *cmd;
+   GLushort *retval;
+
+   if (R200_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s %d prim %x\n", __FUNCTION__, min_nr, primitive);
+
+   assert((primitive & R200_VF_PRIM_WALK_IND));
+   
+   r200EmitState( rmesa );
+   
+   cmd = (drmRadeonCmdHeader *)r200AllocCmdBuf( rmesa, 
+                                               12 + min_nr*2,
+                                               __FUNCTION__ );
+   cmd[0].i = 0;
+   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
+   cmd[1].i = R200_CP_CMD_3D_DRAW_INDX_2;
+   cmd[2].i = (primitive | 
+              R200_VF_PRIM_WALK_IND |
+              R200_VF_COLOR_ORDER_RGBA);
+
+   
+   retval = (GLushort *)(cmd+3);
+
+   if (R200_DEBUG & DEBUG_PRIMS)
+      fprintf(stderr, "%s: header 0x%x prim %x \n",
+             __FUNCTION__,
+             cmd[1].i, primitive);
+
+   assert(!rmesa->dma.flush);
+   rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+   rmesa->dma.flush = r200FlushElts;
+
+   rmesa->store.elts_start = ((char *)cmd) - rmesa->store.cmd_buf;
+
+   return retval;
+}
+
+
+
+void r200EmitVertexAOS( r200ContextPtr rmesa,
+                         GLuint vertex_size,
+                         GLuint offset )
+{
+   drmRadeonCmdHeader *cmd;
+
+   if (R200_DEBUG & (DEBUG_PRIMS|DEBUG_IOCTL))
+      fprintf(stderr, "%s:  vertex_size 0x%x offset 0x%x \n",
+             __FUNCTION__, vertex_size, offset);
+
+   cmd = (drmRadeonCmdHeader *)r200AllocCmdBuf( rmesa, 5 * sizeof(int),
+                                                 __FUNCTION__ );
+
+   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
+   cmd[1].i = R200_CP_CMD_3D_LOAD_VBPNTR | (2 << 16);
+   cmd[2].i = 1;
+   cmd[3].i = vertex_size | (vertex_size << 8);
+   cmd[4].i = offset;
+}
+                      
+
+void r200EmitAOS( r200ContextPtr rmesa,
+                   struct r200_dma_region **component,
+                   GLuint nr,
+                   GLuint offset )
+{
+   drmRadeonCmdHeader *cmd;
+   int sz = 3 + ((nr/2)*3) + ((nr&1)*2);
+   int i;
+   int *tmp;
+
+   if (R200_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s nr arrays: %d\n", __FUNCTION__, nr);
+
+   cmd = (drmRadeonCmdHeader *)r200AllocCmdBuf( rmesa, sz * sizeof(int),
+                                                 __FUNCTION__ );
+   cmd[0].i = 0;
+   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
+   cmd[1].i = R200_CP_CMD_3D_LOAD_VBPNTR | ((sz-3) << 16);
+   cmd[2].i = nr;
+   tmp = &cmd[0].i;
+   cmd += 3;
+
+   for (i = 0 ; i < nr ; i++) {
+      if (i & 1) {
+        cmd[0].i |= ((component[i]->aos_stride << 24) | 
+                     (component[i]->aos_size << 16));
+        cmd[2].i = (component[i]->aos_start + 
+                    offset * component[i]->aos_stride * 4);
+        cmd += 3;
+      }
+      else {
+        cmd[0].i = ((component[i]->aos_stride << 8) | 
+                    (component[i]->aos_size << 0));
+        cmd[1].i = (component[i]->aos_start + 
+                    offset * component[i]->aos_stride * 4);
+      }
+   }
+
+   if (R200_DEBUG & DEBUG_VERTS) {
+      fprintf(stderr, "%s:\n", __FUNCTION__);
+      for (i = 0 ; i < sz ; i++)
+        fprintf(stderr, "   %d: %x\n", i, tmp[i]);
+   }
+}
+
+void r200EmitBlit( r200ContextPtr rmesa,
+                  GLuint color_fmt,
+                  GLuint src_pitch,
+                  GLuint src_offset,
+                  GLuint dst_pitch,
+                  GLuint dst_offset,
+                  GLint srcx, GLint srcy,
+                  GLint dstx, GLint dsty,
+                  GLuint w, GLuint h )
+{
+   drmRadeonCmdHeader *cmd;
+
+   if (R200_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n",
+             __FUNCTION__, 
+             src_pitch, src_offset, srcx, srcy,
+             dst_pitch, dst_offset, dstx, dsty,
+             w, h);
+
+   assert( (src_pitch & 63) == 0 );
+   assert( (dst_pitch & 63) == 0 );
+   assert( (src_offset & 1023) == 0 );
+   assert( (dst_offset & 1023) == 0 );
+   assert( w < (1<<16) );
+   assert( h < (1<<16) );
+
+   cmd = (drmRadeonCmdHeader *)r200AllocCmdBuf( rmesa, 8 * sizeof(int),
+                                                 __FUNCTION__ );
+
+
+   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
+   cmd[1].i = R200_CP_CMD_BITBLT_MULTI | (5 << 16);
+   cmd[2].i = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
+              RADEON_GMC_DST_PITCH_OFFSET_CNTL |
+              RADEON_GMC_BRUSH_NONE |
+              (color_fmt << 8) |
+              RADEON_GMC_SRC_DATATYPE_COLOR |
+              RADEON_ROP3_S |
+              RADEON_DP_SRC_SOURCE_MEMORY |
+              RADEON_GMC_CLR_CMP_CNTL_DIS |
+              RADEON_GMC_WR_MSK_DIS );
+
+   cmd[3].i = ((src_pitch/64)<<22) | (src_offset >> 10);
+   cmd[4].i = ((dst_pitch/64)<<22) | (dst_offset >> 10);
+   cmd[5].i = (srcx << 16) | srcy;
+   cmd[6].i = (dstx << 16) | dsty; /* dst */
+   cmd[7].i = (w << 16) | h;
+}
+
+
+void r200EmitWait( r200ContextPtr rmesa, GLuint flags )
+{
+   if (rmesa->dri.drmMinor >= 6) {
+      drmRadeonCmdHeader *cmd;
+
+      assert( !(flags & ~(RADEON_WAIT_2D|RADEON_WAIT_3D)) );
+      
+      cmd = (drmRadeonCmdHeader *)r200AllocCmdBuf( rmesa, 1 * sizeof(int),
+                                                  __FUNCTION__ );
+      cmd[0].i = 0;
+      cmd[0].wait.cmd_type = RADEON_CMD_WAIT;
+      cmd[0].wait.flags = flags;
+   }
+}
diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c
new file mode 100644 (file)
index 0000000..c5f23ef
--- /dev/null
@@ -0,0 +1,595 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "glheader.h"
+#include "api_arrayelt.h"
+#include "context.h"
+#include "simple_list.h"
+#include "imports.h"
+#include "matrix.h"
+#include "extensions.h"
+#include "state.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "array_cache/acache.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+
+#include "r200_context.h"
+#include "r200_ioctl.h"
+#include "r200_state.h"
+#include "r200_span.h"
+#include "r200_pixel.h"
+#include "r200_tex.h"
+#include "r200_swtcl.h"
+#include "r200_tcl.h"
+#include "r200_vtxfmt.h"
+#include "r200_maos.h"
+
+#define DRIVER_DATE    "20030328"
+
+#include "vblank.h"
+#include "utils.h"
+#ifndef R200_DEBUG
+int R200_DEBUG = (0);
+#endif
+
+
+
+/* Return the width and height of the given buffer.
+ */
+static void r200GetBufferSize( GLframebuffer *buffer,
+                              GLuint *width, GLuint *height )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   LOCK_HARDWARE( rmesa );
+   *width  = rmesa->dri.drawable->w;
+   *height = rmesa->dri.drawable->h;
+   UNLOCK_HARDWARE( rmesa );
+}
+
+/* Return various strings for glGetString().
+ */
+static const GLubyte *r200GetString( GLcontext *ctx, GLenum name )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   static char buffer[128];
+   unsigned   offset;
+   GLuint agp_mode = rmesa->r200Screen->IsPCI ? 0 :
+      rmesa->r200Screen->AGPMode;
+
+   switch ( name ) {
+   case GL_VENDOR:
+      return (GLubyte *)"Tungsten Graphics, Inc.";
+
+   case GL_RENDERER:
+      offset = driGetRendererString( buffer, "R200", DRIVER_DATE,
+                                    agp_mode );
+
+      sprintf( & buffer[ offset ], " %sTCL",
+              !(rmesa->TclFallback & R200_TCL_FALLBACK_TCL_DISABLE)
+              ? "" : "NO-" );
+
+      return (GLubyte *)buffer;
+
+   default:
+      return NULL;
+   }
+}
+
+
+/* Extension strings exported by the R200 driver.
+ */
+static const char * const card_extensions[] =
+{
+    "GL_ARB_multisample",
+    "GL_ARB_multitexture",
+    "GL_ARB_texture_border_clamp",
+    "GL_ARB_texture_compression",
+    "GL_ARB_texture_env_add",
+    "GL_ARB_texture_env_combine",
+    "GL_ARB_texture_env_dot3",
+    "GL_ARB_texture_mirrored_repeat",
+    "GL_EXT_blend_logic_op",
+    "GL_EXT_blend_minmax",
+    "GL_EXT_blend_subtract",
+    "GL_EXT_secondary_color",
+    "GL_EXT_stencil_wrap",
+    "GL_EXT_texture_edge_clamp",
+    "GL_EXT_texture_env_add",
+    "GL_EXT_texture_env_combine",
+    "GL_EXT_texture_env_dot3",
+    "GL_EXT_texture_filter_anisotropic",
+    "GL_EXT_texture_lod_bias",
+    "GL_ATI_texture_env_combine3",
+    "GL_ATI_texture_mirror_once",
+    "GL_IBM_texture_mirrored_repeat",
+    "GL_MESA_pack_invert",
+    "GL_MESA_ycbcr_texture",
+    "GL_NV_blend_square",
+    "GL_NV_texture_rectangle",
+    "GL_SGIS_generate_mipmap",
+    "GL_SGIS_texture_border_clamp",
+    "GL_SGIS_texture_edge_clamp",
+    NULL
+};
+
+extern const struct gl_pipeline_stage _r200_render_stage;
+extern const struct gl_pipeline_stage _r200_tcl_stage;
+
+static const struct gl_pipeline_stage *r200_pipeline[] = {
+
+   /* Try and go straight to t&l
+    */
+   &_r200_tcl_stage,  
+
+   /* Catch any t&l fallbacks
+    */
+   &_tnl_vertex_transform_stage,
+   &_tnl_normal_transform_stage,
+   &_tnl_lighting_stage,
+   &_tnl_fog_coordinate_stage,
+   &_tnl_texgen_stage,
+   &_tnl_texture_transform_stage,
+
+   /* Try again to go to tcl? 
+    *     - no good for asymmetric-twoside (do with multipass)
+    *     - no good for asymmetric-unfilled (do with multipass)
+    *     - good for material
+    *     - good for texgen
+    *     - need to manipulate a bit of state
+    *
+    * - worth it/not worth it?
+    */
+                       
+   /* Else do them here.
+    */
+/*    &_r200_render_stage,  */ /* FIXME: bugs with ut2003 */
+   &_tnl_render_stage,         /* FALLBACK:  */
+   0,
+};
+
+
+
+/* Initialize the driver's misc functions.
+ */
+static void r200InitDriverFuncs( GLcontext *ctx )
+{
+    ctx->Driver.GetBufferSize          = r200GetBufferSize;
+    ctx->Driver.ResizeBuffers           = _swrast_alloc_buffers;
+    ctx->Driver.GetString              = r200GetString;
+
+    ctx->Driver.Error                  = NULL;
+    ctx->Driver.DrawPixels             = NULL;
+    ctx->Driver.Bitmap                 = NULL;
+}
+
+static const struct dri_debug_control debug_control[] =
+{
+    { "fall",  DEBUG_FALLBACKS },
+    { "tex",   DEBUG_TEXTURE },
+    { "ioctl", DEBUG_IOCTL },
+    { "prim",  DEBUG_PRIMS },
+    { "vert",  DEBUG_VERTS },
+    { "state", DEBUG_STATE },
+    { "code",  DEBUG_CODEGEN },
+    { "vfmt",  DEBUG_VFMT },
+    { "vtxf",  DEBUG_VFMT },
+    { "verb",  DEBUG_VERBOSE },
+    { "dri",   DEBUG_DRI },
+    { "dma",   DEBUG_DMA },
+    { "san",   DEBUG_SANITY },
+    { "sync",  DEBUG_SYNC },
+    { "pix",   DEBUG_PIXEL },
+    { "mem",   DEBUG_MEMORY },
+    { NULL,    0 }
+};
+
+
+static int
+get_ust_nop( int64_t * ust )
+{
+   *ust = 1;
+   return 0;
+}
+
+
+/* Create the device specific context.
+ */
+GLboolean r200CreateContext( const __GLcontextModes *glVisual,
+                            __DRIcontextPrivate *driContextPriv,
+                            void *sharedContextPrivate)
+{
+   __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
+   r200ScreenPtr screen = (r200ScreenPtr)(sPriv->private);
+   r200ContextPtr rmesa;
+   GLcontext *ctx, *shareCtx;
+   int i;
+
+   assert(glVisual);
+   assert(driContextPriv);
+   assert(screen);
+
+   /* Allocate the R200 context */
+   rmesa = (r200ContextPtr) CALLOC( sizeof(*rmesa) );
+   if ( !rmesa )
+      return GL_FALSE;
+
+   /* Allocate the Mesa context */
+   if (sharedContextPrivate)
+      shareCtx = ((r200ContextPtr) sharedContextPrivate)->glCtx;
+   else
+      shareCtx = NULL;
+   rmesa->glCtx = _mesa_create_context(glVisual, shareCtx, (void *) rmesa, GL_TRUE);
+   if (!rmesa->glCtx) {
+      FREE(rmesa);
+      return GL_FALSE;
+   }
+   driContextPriv->driverPrivate = rmesa;
+
+   /* Init r200 context data */
+   rmesa->dri.context = driContextPriv;
+   rmesa->dri.screen = sPriv;
+   rmesa->dri.drawable = NULL; /* Set by XMesaMakeCurrent */
+   rmesa->dri.hwContext = driContextPriv->hHWContext;
+   rmesa->dri.hwLock = &sPriv->pSAREA->lock;
+   rmesa->dri.fd = sPriv->fd;
+   rmesa->dri.drmMinor = sPriv->drmMinor;
+
+   rmesa->r200Screen = screen;
+   rmesa->sarea = (RADEONSAREAPrivPtr)((GLubyte *)sPriv->pSAREA +
+                                      screen->sarea_priv_offset);
+
+
+   rmesa->dma.buf0_address = rmesa->r200Screen->buffers->list[0].address;
+
+   (void) memset( rmesa->texture_heaps, 0, sizeof( rmesa->texture_heaps ) );
+   make_empty_list( & rmesa->swapped );
+
+   rmesa->nr_heaps = 1 /* screen->numTexHeaps */ ;
+   for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
+      rmesa->texture_heaps[i] = driCreateTextureHeap( i, rmesa,
+           screen->texSize[i],
+           12,
+           RADEON_NR_TEX_REGIONS,
+           rmesa->sarea->texList[i],
+           & rmesa->sarea->texAge[i],
+           & rmesa->swapped,
+           sizeof( r200TexObj ),
+           (destroy_texture_object_t *) r200DestroyTexObj );
+   }
+
+   rmesa->swtcl.RenderIndex = ~0;
+   rmesa->lost_context = 1;
+
+   /* Set the maximum texture size small enough that we can guarentee that
+    * all texture units can bind a maximal texture and have them both in
+    * texturable memory at once.
+    */
+
+   ctx = rmesa->glCtx;
+   ctx->Const.MaxTextureUnits = 2;
+
+   driCalculateMaxTextureLevels( rmesa->texture_heaps,
+                                rmesa->nr_heaps,
+                                & ctx->Const,
+                                4,
+                                11, /* max 2D texture size is 2048x2048 */
+#if ENABLE_HW_3D_TEXTURE
+                                8,  /* max 3D texture size is 256^3 */
+#else
+                                0,  /* 3D textures unsupported */
+#endif
+                                11, /* max cube texture size is 2048x2048 */
+                                11, /* max texture rectangle size is 2048x2048 */
+                                12,
+                                GL_FALSE );
+
+   ctx->Const.MaxTextureMaxAnisotropy = 16.0;
+
+   /* No wide points.
+    */
+   ctx->Const.MinPointSize = 1.0;
+   ctx->Const.MinPointSizeAA = 1.0;
+   ctx->Const.MaxPointSize = 1.0;
+   ctx->Const.MaxPointSizeAA = 1.0;
+
+   ctx->Const.MinLineWidth = 1.0;
+   ctx->Const.MinLineWidthAA = 1.0;
+   ctx->Const.MaxLineWidth = 10.0;
+   ctx->Const.MaxLineWidthAA = 10.0;
+   ctx->Const.LineWidthGranularity = 0.0625;
+
+   /* Initialize the software rasterizer and helper modules.
+    */
+   _swrast_CreateContext( ctx );
+   _ac_CreateContext( ctx );
+   _tnl_CreateContext( ctx );
+   _swsetup_CreateContext( ctx );
+   _ae_create_context( ctx );
+
+   /* Install the customized pipeline:
+    */
+   _tnl_destroy_pipeline( ctx );
+   _tnl_install_pipeline( ctx, r200_pipeline );
+   ctx->Driver.FlushVertices = r200FlushVertices;
+
+   /* Try and keep materials and vertices separate:
+    */
+   _tnl_isolate_materials( ctx, GL_TRUE );
+
+
+   /* Configure swrast to match hardware characteristics:
+    */
+   _swrast_allow_pixel_fog( ctx, GL_FALSE );
+   _swrast_allow_vertex_fog( ctx, GL_TRUE );
+
+
+   _math_matrix_ctr( &rmesa->TexGenMatrix[0] );
+   _math_matrix_ctr( &rmesa->TexGenMatrix[1] );
+   _math_matrix_ctr( &rmesa->tmpmat );
+   _math_matrix_set_identity( &rmesa->TexGenMatrix[0] );
+   _math_matrix_set_identity( &rmesa->TexGenMatrix[1] );
+   _math_matrix_set_identity( &rmesa->tmpmat );
+
+   driInitExtensions( ctx, card_extensions, GL_TRUE );
+   if (rmesa->r200Screen->drmSupportsCubeMaps)
+      _mesa_enable_extension( ctx, "GL_ARB_texture_cube_map" );
+
+   r200InitDriverFuncs( ctx );
+   r200InitIoctlFuncs( ctx );
+   r200InitStateFuncs( ctx );
+   r200InitSpanFuncs( ctx );
+   r200InitPixelFuncs( ctx );
+   r200InitTextureFuncs( ctx );
+   r200InitState( rmesa );
+   r200InitSwtcl( ctx );
+
+   rmesa->iw.irq_seq = -1;
+   rmesa->irqsEmitted = 0;
+   rmesa->do_irqs = (rmesa->dri.drmMinor >= 6 && 
+                    !getenv("R200_NO_IRQS") &&
+                    rmesa->r200Screen->irq);
+
+   if (!rmesa->do_irqs)
+      fprintf(stderr, 
+             "IRQ's not enabled, falling back to busy waits: %d %d %d\n",
+             rmesa->dri.drmMinor,
+             !!getenv("R200_NO_IRQS"),
+             rmesa->r200Screen->irq);
+
+
+   rmesa->do_usleeps = !getenv("R200_NO_USLEEPS");
+
+   rmesa->vblank_flags = (rmesa->do_irqs)
+       ? driGetDefaultVBlankFlags() : VBLANK_FLAG_NO_IRQ;
+
+   rmesa->prefer_agp_client_texturing = 
+      (getenv("R200_AGP_CLIENT_TEXTURES") != 0);
+   
+#ifndef _SOLO
+   rmesa->get_ust = (PFNGLXGETUSTPROC) glXGetProcAddress( "__glXGetUST" );
+   if ( rmesa->get_ust == NULL ) {
+      rmesa->get_ust = get_ust_nop;
+   }
+
+   (*rmesa->get_ust)( & rmesa->swap_ust );
+#endif
+
+#if DO_DEBUG
+   R200_DEBUG  = driParseDebugString( getenv( "R200_DEBUG" ),
+                                     debug_control );
+   R200_DEBUG |= driParseDebugString( getenv( "RADEON_DEBUG" ),
+                                     debug_control );
+#endif
+
+   if (getenv("R200_NO_RAST")) {
+      fprintf(stderr, "disabling 3D acceleration\n");
+      FALLBACK(rmesa, R200_FALLBACK_DISABLE, 1); 
+   }
+   else if (getenv("R200_NO_TCL")) {
+      fprintf(stderr, "disabling TCL support\n");
+      TCL_FALLBACK(rmesa->glCtx, R200_TCL_FALLBACK_TCL_DISABLE, 1); 
+   }
+   else {
+      if (!getenv("R200_NO_VTXFMT")) {
+        r200VtxfmtInit( ctx );
+      }
+      _tnl_need_dlist_norm_lengths( ctx, GL_FALSE );
+   }
+   return GL_TRUE;
+}
+
+
+/* Destroy the device specific context.
+ */
+/* Destroy the Mesa and driver specific context data.
+ */
+void r200DestroyContext( __DRIcontextPrivate *driContextPriv )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = (r200ContextPtr) driContextPriv->driverPrivate;
+   r200ContextPtr current = ctx ? R200_CONTEXT(ctx) : NULL;
+
+   /* check if we're deleting the currently bound context */
+   if (rmesa == current) {
+      R200_FIREVERTICES( rmesa );
+      _mesa_make_current2(NULL, NULL, NULL);
+   }
+
+   /* Free r200 context resources */
+   assert(rmesa); /* should never be null */
+   if ( rmesa ) {
+      GLboolean   release_texture_heaps;
+
+
+      release_texture_heaps = (rmesa->glCtx->Shared->RefCount == 1);
+      _swsetup_DestroyContext( rmesa->glCtx );
+      _tnl_DestroyContext( rmesa->glCtx );
+      _ac_DestroyContext( rmesa->glCtx );
+      _swrast_DestroyContext( rmesa->glCtx );
+
+      r200DestroySwtcl( rmesa->glCtx );
+      r200ReleaseArrays( rmesa->glCtx, ~0 );
+
+      if (rmesa->dma.current.buf) {
+        r200ReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ );
+        r200FlushCmdBuf( rmesa, __FUNCTION__ );
+      }
+
+      if (!rmesa->TclFallback & R200_TCL_FALLBACK_TCL_DISABLE)
+        if (!getenv("R200_NO_VTXFMT"))
+           r200VtxfmtDestroy( rmesa->glCtx );
+
+      /* free the Mesa context */
+      rmesa->glCtx->DriverCtx = NULL;
+      _mesa_destroy_context( rmesa->glCtx );
+
+      if (rmesa->state.scissor.pClipRects) {
+        FREE(rmesa->state.scissor.pClipRects);
+        rmesa->state.scissor.pClipRects = 0;
+      }
+
+      if ( release_texture_heaps ) {
+         /* This share group is about to go away, free our private
+          * texture object data.
+          */
+         int i;
+
+        assert( is_empty_list( & rmesa->swapped ) );
+
+         for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
+           driDestroyTextureHeap( rmesa->texture_heaps[ i ] );
+           rmesa->texture_heaps[ i ] = NULL;
+         }
+      }
+
+      FREE( rmesa );
+   }
+}
+
+
+
+
+void
+r200SwapBuffers( __DRIdrawablePrivate *dPriv )
+{
+   if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+      r200ContextPtr rmesa;
+      GLcontext *ctx;
+      rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate;
+      ctx = rmesa->glCtx;
+      if (ctx->Visual.doubleBufferMode) {
+         _mesa_notifySwapBuffers( ctx );  /* flush pending rendering comands */
+         if ( rmesa->doPageFlip ) {
+            r200PageFlip( dPriv );
+         }
+         else {
+            r200CopyBuffer( dPriv );
+         }
+      }
+   }
+   else {
+      /* XXX this shouldn't be an error but we can't handle it for now */
+      _mesa_problem(NULL, "%s: drawable has no context!", __FUNCTION__);
+   }
+}
+
+
+/* Force the context `c' to be the current context and associate with it
+ * buffer `b'.
+ */
+GLboolean
+r200MakeCurrent( __DRIcontextPrivate *driContextPriv,
+                   __DRIdrawablePrivate *driDrawPriv,
+                   __DRIdrawablePrivate *driReadPriv )
+{
+   if ( driContextPriv ) {
+      r200ContextPtr newCtx = 
+        (r200ContextPtr) driContextPriv->driverPrivate;
+
+      if (R200_DEBUG & DEBUG_DRI)
+        fprintf(stderr, "%s ctx %p\n", __FUNCTION__, newCtx->glCtx);
+
+      if ( newCtx->dri.drawable != driDrawPriv ) {
+        newCtx->dri.drawable = driDrawPriv;
+        r200UpdateWindow( newCtx->glCtx );
+        r200UpdateViewportOffset( newCtx->glCtx );
+      }
+
+      _mesa_make_current2( newCtx->glCtx,
+                          (GLframebuffer *) driDrawPriv->driverPrivate,
+                          (GLframebuffer *) driReadPriv->driverPrivate );
+
+      if ( !newCtx->glCtx->Viewport.Width ) {
+        _mesa_set_viewport( newCtx->glCtx, 0, 0,
+                            driDrawPriv->w, driDrawPriv->h );
+      }
+
+      if (newCtx->vb.enabled)
+        r200VtxfmtMakeCurrent( newCtx->glCtx );
+
+      _mesa_update_state( newCtx->glCtx );
+      r200ValidateState( newCtx->glCtx );
+
+   } else {
+      if (R200_DEBUG & DEBUG_DRI)
+        fprintf(stderr, "%s ctx is null\n", __FUNCTION__);
+      _mesa_make_current( 0, 0 );
+   }
+
+   if (R200_DEBUG & DEBUG_DRI)
+      fprintf(stderr, "End %s\n", __FUNCTION__);
+   return GL_TRUE;
+}
+
+/* Force the context `c' to be unbound from its buffer.
+ */
+GLboolean
+r200UnbindContext( __DRIcontextPrivate *driContextPriv )
+{
+   r200ContextPtr rmesa = (r200ContextPtr) driContextPriv->driverPrivate;
+
+   if (R200_DEBUG & DEBUG_DRI)
+      fprintf(stderr, "%s ctx %p\n", __FUNCTION__, rmesa->glCtx);
+
+   r200VtxfmtUnbindContext( rmesa->glCtx );
+   return GL_TRUE;
+}
diff --git a/src/mesa/drivers/dri/r200/r200_context.h b/src/mesa/drivers/dri/r200/r200_context.h
new file mode 100644 (file)
index 0000000..3802dee
--- /dev/null
@@ -0,0 +1,926 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef __R200_CONTEXT_H__
+#define __R200_CONTEXT_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+#include <inttypes.h>
+#include "dri_util.h"
+#include "radeon_common.h"
+#include "texmem.h"
+
+#include "macros.h"
+#include "mtypes.h"
+#include "colormac.h"
+#include "r200_reg.h"
+
+#define ENABLE_HW_3D_TEXTURE 0  /* XXX this is temporary! */
+
+struct r200_context;
+typedef struct r200_context r200ContextRec;
+typedef struct r200_context *r200ContextPtr;
+
+#include "r200_lock.h"
+#include "r200_screen.h"
+#include "mm.h"
+
+/* Flags for software fallback cases */
+/* See correponding strings in r200_swtcl.c */
+#define R200_FALLBACK_TEXTURE           0x1
+#define R200_FALLBACK_DRAW_BUFFER       0x2
+#define R200_FALLBACK_STENCIL           0x4
+#define R200_FALLBACK_RENDER_MODE       0x8
+#define R200_FALLBACK_BLEND_EQ          0x10
+#define R200_FALLBACK_BLEND_FUNC        0x20
+#define R200_FALLBACK_DISABLE           0x40
+#define R200_FALLBACK_BORDER_MODE       0x80
+
+/* The blit width for texture uploads
+ */
+#define BLIT_WIDTH_BYTES 1024
+
+/* Use the templated vertex format:
+ */
+#define COLOR_IS_RGBA
+#define TAG(x) r200##x
+#include "tnl_dd/t_dd_vertex.h"
+#undef TAG
+
+typedef void (*r200_tri_func)( r200ContextPtr,
+                                r200Vertex *,
+                                r200Vertex *,
+                                r200Vertex * );
+
+typedef void (*r200_line_func)( r200ContextPtr,
+                                 r200Vertex *,
+                                 r200Vertex * );
+
+typedef void (*r200_point_func)( r200ContextPtr,
+                                  r200Vertex * );
+
+
+struct r200_colorbuffer_state {
+   GLuint clear;
+   GLint drawOffset, drawPitch;
+};
+
+
+struct r200_depthbuffer_state {
+   GLfloat scale;
+};
+
+struct r200_pixel_state {
+   GLint readOffset, readPitch;
+};
+
+struct r200_scissor_state {
+   XF86DRIClipRectRec rect;
+   GLboolean enabled;
+
+   GLuint numClipRects;                        /* Cliprects active */
+   GLuint numAllocedClipRects;         /* Cliprects available */
+   XF86DRIClipRectPtr pClipRects;
+};
+
+struct r200_stencilbuffer_state {
+   GLboolean hwBuffer;
+   GLuint clear;                       /* rb3d_stencilrefmask value */
+};
+
+struct r200_stipple_state {
+   GLuint mask[32];
+};
+
+
+
+#define TEX_0   0x1
+#define TEX_1   0x2
+#define TEX_ALL 0x3
+
+typedef struct r200_tex_obj r200TexObj, *r200TexObjPtr;
+
+/* Texture object in locally shared texture space.
+ */
+struct r200_tex_obj {
+   driTextureObject   base;
+
+   GLuint bufAddr;                     /* Offset to start of locally
+                                          shared texture block */
+
+   GLuint dirty_state;                 /* Flags (1 per texunit) for
+                                          whether or not this texobj
+                                          has dirty hardware state
+                                          (pp_*) that needs to be
+                                          brought into the
+                                          texunit. */
+
+   drmRadeonTexImage image[6][RADEON_MAX_TEXTURE_LEVELS];
+                                       /* Six, for the cube faces */
+
+   GLuint pp_txfilter;                 /* hardware register values */
+   GLuint pp_txformat;
+   GLuint pp_txformat_x;
+   GLuint pp_txoffset;                 /* Image location in texmem.
+                                          All cube faces follow. */
+   GLuint pp_txsize;                   /* npot only */
+   GLuint pp_txpitch;                  /* npot only */
+   GLuint pp_border_color;
+   GLuint pp_cubic_faces;              /* cube face 1,2,3,4 log2 sizes */
+
+   GLboolean  border_fallback;
+};
+
+
+struct r200_texture_env_state {
+   r200TexObjPtr texobj;
+   GLenum format;
+   GLenum envMode;
+};
+
+#define R200_MAX_TEXTURE_UNITS 3
+
+struct r200_texture_state {
+   struct r200_texture_env_state unit[R200_MAX_TEXTURE_UNITS];
+};
+
+
+struct r200_state_atom {
+   struct r200_state_atom *next, *prev;
+   const char *name;                    /* for debug */
+   int cmd_size;                        /* size in bytes */
+   GLuint idx;
+   int *cmd;                            /* one or more cmd's */
+   int *lastcmd;                        /* one or more cmd's */
+   GLboolean (*check)( GLcontext *, int );    /* is this state active? */
+};
+   
+
+
+/* Trying to keep these relatively short as the variables are becoming
+ * extravagently long.  Drop the driver name prefix off the front of
+ * everything - I think we know which driver we're in by now, and keep the
+ * prefix to 3 letters unless absolutely impossible.  
+ */
+
+#define CTX_CMD_0             0
+#define CTX_PP_MISC           1
+#define CTX_PP_FOG_COLOR      2
+#define CTX_RE_SOLID_COLOR    3
+#define CTX_RB3D_BLENDCNTL    4
+#define CTX_RB3D_DEPTHOFFSET  5
+#define CTX_RB3D_DEPTHPITCH   6
+#define CTX_RB3D_ZSTENCILCNTL 7
+#define CTX_CMD_1             8
+#define CTX_PP_CNTL           9
+#define CTX_RB3D_CNTL         10
+#define CTX_RB3D_COLOROFFSET  11
+#define CTX_CMD_2             12 /* why */
+#define CTX_RB3D_COLORPITCH   13 /* why */
+#define CTX_STATE_SIZE        14
+
+#define SET_CMD_0               0
+#define SET_SE_CNTL             1
+#define SET_RE_CNTL             2 /* replace se_coord_fmt */
+#define SET_STATE_SIZE          3
+
+#define VTE_CMD_0               0
+#define VTE_SE_VTE_CNTL         1
+#define VTE_STATE_SIZE          2
+
+#define LIN_CMD_0               0
+#define LIN_RE_LINE_PATTERN     1
+#define LIN_RE_LINE_STATE       2
+#define LIN_CMD_1               3
+#define LIN_SE_LINE_WIDTH       4
+#define LIN_STATE_SIZE          5
+
+#define MSK_CMD_0               0
+#define MSK_RB3D_STENCILREFMASK 1
+#define MSK_RB3D_ROPCNTL        2
+#define MSK_RB3D_PLANEMASK      3
+#define MSK_STATE_SIZE          4
+
+#define VPT_CMD_0           0
+#define VPT_SE_VPORT_XSCALE          1
+#define VPT_SE_VPORT_XOFFSET         2
+#define VPT_SE_VPORT_YSCALE          3
+#define VPT_SE_VPORT_YOFFSET         4
+#define VPT_SE_VPORT_ZSCALE          5
+#define VPT_SE_VPORT_ZOFFSET         6
+#define VPT_STATE_SIZE      7
+
+#define ZBS_CMD_0               0
+#define ZBS_SE_ZBIAS_FACTOR     1
+#define ZBS_SE_ZBIAS_CONSTANT   2
+#define ZBS_STATE_SIZE          3
+
+#define MSC_CMD_0               0
+#define MSC_RE_MISC             1
+#define MSC_STATE_SIZE          2
+
+#define TAM_CMD_0               0
+#define TAM_DEBUG3              1
+#define TAM_STATE_SIZE          2
+
+#define TEX_CMD_0                   0
+#define TEX_PP_TXFILTER             1  /*2c00*/
+#define TEX_PP_TXFORMAT             2  /*2c04*/
+#define TEX_PP_TXFORMAT_X           3  /*2c08*/
+#define TEX_PP_TXSIZE               4  /*2c0c*/
+#define TEX_PP_TXPITCH              5  /*2c10*/
+#define TEX_PP_BORDER_COLOR         6  /*2c14*/
+#define TEX_CMD_1                   7
+#define TEX_PP_TXOFFSET             8  /*2d00 */
+#define TEX_STATE_SIZE              9
+
+#define CUBE_CMD_0                  0  /* 1 register follows */
+#define CUBE_PP_CUBIC_FACES         1  /* 0x2c18 */
+#define CUBE_CMD_1                  2  /* 5 registers follow */
+#define CUBE_PP_CUBIC_OFFSET_F1     3  /* 0x2d04 */
+#define CUBE_PP_CUBIC_OFFSET_F2     4  /* 0x2d08 */
+#define CUBE_PP_CUBIC_OFFSET_F3     5  /* 0x2d0c */
+#define CUBE_PP_CUBIC_OFFSET_F4     6  /* 0x2d10 */
+#define CUBE_PP_CUBIC_OFFSET_F5     7  /* 0x2d14 */
+#define CUBE_STATE_SIZE             8
+
+#define PIX_CMD_0                   0
+#define PIX_PP_TXCBLEND             1
+#define PIX_PP_TXCBLEND2            2
+#define PIX_PP_TXABLEND             3
+#define PIX_PP_TXABLEND2            4
+#define PIX_STATE_SIZE              5
+
+#define TF_CMD_0                    0
+#define TF_TFACTOR_0                1
+#define TF_TFACTOR_1                2
+#define TF_TFACTOR_2                3
+#define TF_TFACTOR_3                4
+#define TF_TFACTOR_4                5
+#define TF_TFACTOR_5                6
+#define TF_STATE_SIZE               7
+
+#define TCL_CMD_0                 0
+#define TCL_LIGHT_MODEL_CTL_0     1
+#define TCL_LIGHT_MODEL_CTL_1     2
+#define TCL_PER_LIGHT_CTL_0       3
+#define TCL_PER_LIGHT_CTL_1       4
+#define TCL_PER_LIGHT_CTL_2       5
+#define TCL_PER_LIGHT_CTL_3       6
+#define TCL_CMD_1                 7
+#define TCL_UCP_VERT_BLEND_CTL    8
+#define TCL_STATE_SIZE            9
+
+#define MSL_CMD_0                     0
+#define MSL_MATRIX_SELECT_0           1
+#define MSL_MATRIX_SELECT_1           2
+#define MSL_MATRIX_SELECT_2           3
+#define MSL_MATRIX_SELECT_3           4
+#define MSL_MATRIX_SELECT_4           5
+#define MSL_STATE_SIZE                6
+
+#define TCG_CMD_0                 0
+#define TCG_TEX_PROC_CTL_2            1
+#define TCG_TEX_PROC_CTL_3            2
+#define TCG_TEX_PROC_CTL_0            3
+#define TCG_TEX_PROC_CTL_1            4
+#define TCG_TEX_CYL_WRAP_CTL      5
+#define TCG_STATE_SIZE            6
+
+#define MTL_CMD_0            0 
+#define MTL_EMMISSIVE_RED    1 
+#define MTL_EMMISSIVE_GREEN  2 
+#define MTL_EMMISSIVE_BLUE   3 
+#define MTL_EMMISSIVE_ALPHA  4 
+#define MTL_AMBIENT_RED      5
+#define MTL_AMBIENT_GREEN    6
+#define MTL_AMBIENT_BLUE     7
+#define MTL_AMBIENT_ALPHA    8
+#define MTL_DIFFUSE_RED      9
+#define MTL_DIFFUSE_GREEN    10
+#define MTL_DIFFUSE_BLUE     11
+#define MTL_DIFFUSE_ALPHA    12
+#define MTL_SPECULAR_RED     13
+#define MTL_SPECULAR_GREEN   14
+#define MTL_SPECULAR_BLUE    15
+#define MTL_SPECULAR_ALPHA   16
+#define MTL_CMD_1            17
+#define MTL_SHININESS        18
+#define MTL_STATE_SIZE       19
+
+#define VAP_CMD_0                   0
+#define VAP_SE_VAP_CNTL             1
+#define VAP_STATE_SIZE              2
+
+/* Replaces a lot of packet info from radeon
+ */
+#define VTX_CMD_0                   0
+#define VTX_VTXFMT_0            1
+#define VTX_VTXFMT_1            2
+#define VTX_TCL_OUTPUT_VTXFMT_0 3
+#define VTX_TCL_OUTPUT_VTXFMT_1 4
+#define VTX_CMD_1               5
+#define VTX_TCL_OUTPUT_COMPSEL  6
+#define VTX_CMD_2               7
+#define VTX_STATE_CNTL          8
+#define VTX_STATE_SIZE          9
+
+
+#define VTX_COLOR(v,n)   (((v)>>(R200_VTX_COLOR_0_SHIFT+(n)*2))&\
+                         R200_VTX_COLOR_MASK)
+
+#define MAT_CMD_0              0
+#define MAT_ELT_0              1
+#define MAT_STATE_SIZE         17
+
+#define GRD_CMD_0                  0
+#define GRD_VERT_GUARD_CLIP_ADJ    1
+#define GRD_VERT_GUARD_DISCARD_ADJ 2
+#define GRD_HORZ_GUARD_CLIP_ADJ    3
+#define GRD_HORZ_GUARD_DISCARD_ADJ 4
+#define GRD_STATE_SIZE             5
+
+/* position changes frequently when lighting in modelpos - separate
+ * out to new state item?  
+ */
+#define LIT_CMD_0                  0
+#define LIT_AMBIENT_RED            1
+#define LIT_AMBIENT_GREEN          2
+#define LIT_AMBIENT_BLUE           3
+#define LIT_AMBIENT_ALPHA          4
+#define LIT_DIFFUSE_RED            5
+#define LIT_DIFFUSE_GREEN          6
+#define LIT_DIFFUSE_BLUE           7
+#define LIT_DIFFUSE_ALPHA          8
+#define LIT_SPECULAR_RED           9
+#define LIT_SPECULAR_GREEN         10
+#define LIT_SPECULAR_BLUE          11
+#define LIT_SPECULAR_ALPHA         12
+#define LIT_POSITION_X             13
+#define LIT_POSITION_Y             14
+#define LIT_POSITION_Z             15
+#define LIT_POSITION_W             16
+#define LIT_DIRECTION_X            17
+#define LIT_DIRECTION_Y            18
+#define LIT_DIRECTION_Z            19
+#define LIT_DIRECTION_W            20
+#define LIT_ATTEN_CONST            21
+#define LIT_ATTEN_LINEAR           22
+#define LIT_ATTEN_QUADRATIC        23
+#define LIT_ATTEN_XXX              24
+#define LIT_CMD_1                  25
+#define LIT_SPOT_DCD               26
+#define LIT_SPOT_DCM               27
+#define LIT_SPOT_EXPONENT          28
+#define LIT_SPOT_CUTOFF            29
+#define LIT_SPECULAR_THRESH        30
+#define LIT_RANGE_CUTOFF           31 /* ? */
+#define LIT_RANGE_ATTEN            32 /* ? */
+#define LIT_STATE_SIZE             33
+
+/* Fog
+ */
+#define FOG_CMD_0      0
+#define FOG_R          1
+#define FOG_C          2
+#define FOG_D          3
+#define FOG_PAD        4
+#define FOG_STATE_SIZE 5
+
+/* UCP
+ */
+#define UCP_CMD_0      0
+#define UCP_X          1
+#define UCP_Y          2
+#define UCP_Z          3
+#define UCP_W          4
+#define UCP_STATE_SIZE 5
+
+/* GLT - Global ambient
+ */
+#define GLT_CMD_0      0
+#define GLT_RED        1
+#define GLT_GREEN      2
+#define GLT_BLUE       3
+#define GLT_ALPHA      4
+#define GLT_STATE_SIZE 5
+
+/* EYE
+ */
+#define EYE_CMD_0          0
+#define EYE_X              1
+#define EYE_Y              2
+#define EYE_Z              3
+#define EYE_RESCALE_FACTOR 4
+#define EYE_STATE_SIZE     5
+
+/* CST - constant state
+ */
+#define CST_CMD_0                             0
+#define CST_PP_CNTL_X                         1
+#define CST_CMD_1                             2
+#define CST_RB3D_DEPTHXY_OFFSET               3
+#define CST_CMD_2                             4
+#define CST_RE_AUX_SCISSOR_CNTL               5
+#define CST_CMD_3                             6
+#define CST_RE_SCISSOR_TL_0                   7
+#define CST_RE_SCISSOR_BR_0                   8
+#define CST_CMD_4                             9
+#define CST_SE_VAP_CNTL_STATUS                10
+#define CST_CMD_5                             11
+#define CST_RE_POINTSIZE                      12
+#define CST_CMD_6                             13
+#define CST_SE_TCL_INPUT_VTX_0                14
+#define CST_SE_TCL_INPUT_VTX_1                15
+#define CST_SE_TCL_INPUT_VTX_2                16
+#define CST_SE_TCL_INPUT_VTX_3                17
+#define CST_STATE_SIZE                        18
+
+
+
+
+struct r200_hw_state {
+   /* All state should be on one of these lists:
+    */
+   struct r200_state_atom dirty; /* dirty list head placeholder */
+   struct r200_state_atom clean; /* clean list head placeholder */
+
+   /* Hardware state, stored as cmdbuf commands:  
+    *   -- Need to doublebuffer for
+    *           - reviving state after loss of context
+    *           - eliding noop statechange loops? (except line stipple count)
+    */
+   struct r200_state_atom ctx;
+   struct r200_state_atom set;
+   struct r200_state_atom vte;
+   struct r200_state_atom lin;
+   struct r200_state_atom msk;
+   struct r200_state_atom vpt;
+   struct r200_state_atom vap;
+   struct r200_state_atom vtx;
+   struct r200_state_atom tcl;
+   struct r200_state_atom msl;
+   struct r200_state_atom tcg;
+   struct r200_state_atom msc;
+   struct r200_state_atom cst;
+   struct r200_state_atom tam;
+   struct r200_state_atom tf;
+   struct r200_state_atom tex[2];
+   struct r200_state_atom cube[2];
+   struct r200_state_atom zbs;
+   struct r200_state_atom mtl[2]; 
+   struct r200_state_atom mat[5]; 
+   struct r200_state_atom lit[8]; /* includes vec, scl commands */
+   struct r200_state_atom ucp[6];
+   struct r200_state_atom pix[6]; /* pixshader stages */
+   struct r200_state_atom eye; /* eye pos */
+   struct r200_state_atom grd; /* guard band clipping */
+   struct r200_state_atom fog; 
+   struct r200_state_atom glt; 
+};
+
+struct r200_state {
+   /* Derived state for internal purposes:
+    */
+   struct r200_colorbuffer_state color;
+   struct r200_depthbuffer_state depth;
+   struct r200_pixel_state pixel;
+   struct r200_scissor_state scissor;
+   struct r200_stencilbuffer_state stencil;
+   struct r200_stipple_state stipple;
+   struct r200_texture_state texture;
+};
+
+/* Need refcounting on dma buffers:
+ */
+struct r200_dma_buffer {
+   int refcount;               /* the number of retained regions in buf */
+   drmBufPtr buf;
+};
+
+#define GET_START(rvb) (rmesa->r200Screen->agp_buffer_offset +         \
+                       (rvb)->address - rmesa->dma.buf0_address +      \
+                       (rvb)->start)
+
+/* A retained region, eg vertices for indexed vertices.
+ */
+struct r200_dma_region {
+   struct r200_dma_buffer *buf;
+   char *address;              /* == buf->address */
+   int start, end, ptr;                /* offsets from start of buf */
+   int aos_start;
+   int aos_stride;
+   int aos_size;
+};
+
+
+struct r200_dma {
+   /* Active dma region.  Allocations for vertices and retained
+    * regions come from here.  Also used for emitting random vertices,
+    * these may be flushed by calling flush_current();
+    */
+   struct r200_dma_region current;
+   
+   void (*flush)( r200ContextPtr );
+
+   char *buf0_address;         /* start of buf[0], for index calcs */
+   GLuint nr_released_bufs;    /* flush after so many buffers released */
+};
+
+struct r200_dri_mirror {
+   __DRIcontextPrivate *context;       /* DRI context */
+   __DRIscreenPrivate  *screen;        /* DRI screen */
+   __DRIdrawablePrivate        *drawable;      /* DRI drawable bound to this ctx */
+
+   drmContext hwContext;
+   drmLock *hwLock;
+   int fd;
+   int drmMinor;
+};
+
+
+#define R200_CMD_BUF_SZ  (8*1024) 
+
+struct r200_store {
+   GLuint statenr;
+   GLuint primnr;
+   char cmd_buf[R200_CMD_BUF_SZ];
+   int cmd_used;   
+   int elts_start;
+};
+
+
+/* r200_tcl.c
+ */
+struct r200_tcl_info {
+   GLuint vertex_format;
+   GLint last_offset;
+   GLuint hw_primitive;
+
+   struct r200_dma_region *aos_components[8];
+   GLuint nr_aos_components;
+
+   GLuint *Elts;
+
+   struct r200_dma_region indexed_verts;
+   struct r200_dma_region obj;
+   struct r200_dma_region rgba;
+   struct r200_dma_region spec;
+   struct r200_dma_region fog;
+   struct r200_dma_region tex[R200_MAX_TEXTURE_UNITS];
+   struct r200_dma_region norm;
+};
+
+
+/* r200_swtcl.c
+ */
+struct r200_swtcl_info {
+   GLuint SetupIndex;
+   GLuint SetupNewInputs;
+   GLuint RenderIndex;
+   GLuint vertex_size;
+   GLuint vertex_stride_shift;
+   GLuint vertex_format;
+   char *verts;
+
+   /* Fallback rasterization functions
+    */
+   r200_point_func draw_point;
+   r200_line_func draw_line;
+   r200_tri_func draw_tri;
+
+   GLuint hw_primitive;
+   GLenum render_primitive;
+   GLuint numverts;
+
+   struct r200_dma_region indexed_verts;
+};
+
+
+struct r200_ioctl {
+   GLuint vertex_offset;
+   GLuint vertex_size;
+};
+
+
+
+#define R200_MAX_PRIMS 64
+
+
+/* Want to keep a cache of these around.  Each is parameterized by
+ * only a single value which has only a small range.  Only expect a
+ * few, so just rescan the list each time?
+ */
+struct dynfn {
+   struct dynfn *next, *prev;
+   int key[2];
+   char *code;
+};
+
+struct dfn_lists {
+   struct dynfn Vertex2f;
+   struct dynfn Vertex2fv;
+   struct dynfn Vertex3f;
+   struct dynfn Vertex3fv;
+   struct dynfn Color4ub;
+   struct dynfn Color4ubv;
+   struct dynfn Color3ub;
+   struct dynfn Color3ubv;
+   struct dynfn Color4f;
+   struct dynfn Color4fv;
+   struct dynfn Color3f;
+   struct dynfn Color3fv;
+   struct dynfn SecondaryColor3ubEXT;
+   struct dynfn SecondaryColor3ubvEXT;
+   struct dynfn SecondaryColor3fEXT;
+   struct dynfn SecondaryColor3fvEXT;
+   struct dynfn Normal3f;
+   struct dynfn Normal3fv;
+   struct dynfn TexCoord2f;
+   struct dynfn TexCoord2fv;
+   struct dynfn TexCoord1f;
+   struct dynfn TexCoord1fv;
+   struct dynfn MultiTexCoord2fARB;
+   struct dynfn MultiTexCoord2fvARB;
+   struct dynfn MultiTexCoord1fARB;
+   struct dynfn MultiTexCoord1fvARB;
+};
+
+struct dfn_generators {
+   struct dynfn *(*Vertex2f)( GLcontext *, const int * );
+   struct dynfn *(*Vertex2fv)( GLcontext *, const int * );
+   struct dynfn *(*Vertex3f)( GLcontext *, const int * );
+   struct dynfn *(*Vertex3fv)( GLcontext *, const int * );
+   struct dynfn *(*Color4ub)( GLcontext *, const int * );
+   struct dynfn *(*Color4ubv)( GLcontext *, const int * );
+   struct dynfn *(*Color3ub)( GLcontext *, const int * );
+   struct dynfn *(*Color3ubv)( GLcontext *, const int * );
+   struct dynfn *(*Color4f)( GLcontext *, const int * );
+   struct dynfn *(*Color4fv)( GLcontext *, const int * );
+   struct dynfn *(*Color3f)( GLcontext *, const int * );
+   struct dynfn *(*Color3fv)( GLcontext *, const int * );
+   struct dynfn *(*SecondaryColor3ubEXT)( GLcontext *, const int * );
+   struct dynfn *(*SecondaryColor3ubvEXT)( GLcontext *, const int * );
+   struct dynfn *(*SecondaryColor3fEXT)( GLcontext *, const int * );
+   struct dynfn *(*SecondaryColor3fvEXT)( GLcontext *, const int * );
+   struct dynfn *(*Normal3f)( GLcontext *, const int * );
+   struct dynfn *(*Normal3fv)( GLcontext *, const int * );
+   struct dynfn *(*TexCoord2f)( GLcontext *, const int * );
+   struct dynfn *(*TexCoord2fv)( GLcontext *, const int * );
+   struct dynfn *(*TexCoord1f)( GLcontext *, const int * );
+   struct dynfn *(*TexCoord1fv)( GLcontext *, const int * );
+   struct dynfn *(*MultiTexCoord2fARB)( GLcontext *, const int * );
+   struct dynfn *(*MultiTexCoord2fvARB)( GLcontext *, const int * );
+   struct dynfn *(*MultiTexCoord1fARB)( GLcontext *, const int * );
+   struct dynfn *(*MultiTexCoord1fvARB)( GLcontext *, const int * );
+};
+
+
+
+struct r200_prim {
+   GLuint start;
+   GLuint end;
+   GLuint prim;
+};
+
+struct r200_vbinfo {
+   GLint counter, initial_counter;
+   GLint *dmaptr;
+   void (*notify)( void );
+   GLint vertex_size;
+
+   /* A maximum total of 15 elements per vertex:  3 floats for position, 3
+    * floats for normal, 4 floats for color, 4 bytes for secondary color,
+    * 2 floats for each texture unit (4 floats total).
+    * 
+    * As soon as the 3rd TMU is supported or cube maps (or 3D textures) are
+    * supported, this value will grow.
+    * 
+    * The position data is never actually stored here, so 3 elements could be
+    * trimmed out of the buffer.
+    */
+   union { float f; int i; r200_color_t color; } vertex[15];
+
+   GLfloat *normalptr;
+   GLfloat *floatcolorptr;
+   r200_color_t *colorptr;
+   GLfloat *floatspecptr;
+   r200_color_t *specptr;
+   GLfloat *texcoordptr[2];
+
+
+   GLenum *prim;               /* &ctx->Driver.CurrentExecPrimitive */
+   GLuint primflags;
+   GLboolean enabled;          /* *_NO_VTXFMT / *_NO_TCL env vars */
+   GLboolean installed;
+   GLboolean fell_back;
+   GLboolean recheck;
+   GLint nrverts;
+   GLuint vtxfmt_0, vtxfmt_1;
+
+   GLuint installed_vertex_format;
+   GLuint installed_color_3f_sz;
+
+   struct r200_prim primlist[R200_MAX_PRIMS];
+   int nrprims;
+
+   struct dfn_lists dfn_cache;
+   struct dfn_generators codegen;
+   GLvertexformat vtxfmt;
+};
+
+
+
+
+struct r200_context {
+   GLcontext *glCtx;                   /* Mesa context */
+
+   /* Driver and hardware state management
+    */
+   struct r200_hw_state hw;
+   struct r200_state state;
+
+   /* Texture object bookkeeping
+    */
+   unsigned              nr_heaps;
+   driTexHeap          * texture_heaps[ R200_NR_TEX_HEAPS ];
+   driTextureObject      swapped;
+
+
+   /* Rasterization and vertex state:
+    */
+   GLuint TclFallback;
+   GLuint Fallback;
+   GLuint NewGLState;
+
+   
+   /* Temporaries for translating away float colors:
+    */
+   struct gl_client_array UbyteColor;
+   struct gl_client_array UbyteSecondaryColor;
+
+   /* Vertex buffers
+    */
+   struct r200_ioctl ioctl;
+   struct r200_dma dma;
+   struct r200_store store;
+
+   /* Page flipping
+    */
+   GLuint doPageFlip;
+
+   /* Busy waiting
+    */
+   GLuint do_usleeps;
+   GLuint do_irqs;
+   GLuint irqsEmitted;
+   drmRadeonIrqWait iw;
+
+   /* Clientdata textures;
+    */
+   GLuint prefer_agp_client_texturing;
+
+   /* Drawable, cliprect and scissor information
+    */
+   GLuint numClipRects;                        /* Cliprects for the draw buffer */
+   XF86DRIClipRectPtr pClipRects;
+   unsigned int lastStamp;
+   GLboolean lost_context;
+   r200ScreenPtr r200Screen;   /* Screen private DRI data */
+   RADEONSAREAPrivPtr sarea;           /* Private SAREA data */
+
+   /* TCL stuff
+    */
+   GLmatrix TexGenMatrix[R200_MAX_TEXTURE_UNITS];
+   GLboolean recheck_texgen[R200_MAX_TEXTURE_UNITS];
+   GLboolean TexGenNeedNormals[R200_MAX_TEXTURE_UNITS];
+   GLuint TexMatEnabled;
+   GLuint TexMatCompSel;
+   GLuint TexGenEnabled;
+   GLuint TexGenInputs;
+   GLuint TexGenCompSel;
+   GLmatrix tmpmat;
+
+   /* VBI / buffer swap
+    */
+   GLuint vbl_seq;
+   GLuint vblank_flags;
+
+   uint64_t swap_ust;
+   uint64_t swap_missed_ust;
+
+   GLuint swap_count;
+   GLuint swap_missed_count;
+
+   PFNGLXGETUSTPROC get_ust;
+
+   /* r200_tcl.c
+    */
+   struct r200_tcl_info tcl;
+
+   /* r200_swtcl.c
+    */
+   struct r200_swtcl_info swtcl;
+
+   /* r200_vtxfmt.c
+    */
+   struct r200_vbinfo vb;
+
+   /* Mirrors of some DRI state
+    */
+   struct r200_dri_mirror dri;
+};
+
+#define R200_CONTEXT(ctx)              ((r200ContextPtr)(ctx->DriverCtx))
+
+
+static __inline GLuint r200PackColor( GLuint cpp,
+                                       GLubyte r, GLubyte g,
+                                       GLubyte b, GLubyte a )
+{
+   switch ( cpp ) {
+   case 2:
+      return PACK_COLOR_565( r, g, b );
+   case 4:
+      return PACK_COLOR_8888( a, r, g, b );
+   default:
+      return 0;
+   }
+}
+
+#define R200_OLD_PACKETS 0
+
+
+extern void r200DestroyContext( __DRIcontextPrivate *driContextPriv );
+extern GLboolean r200CreateContext( const __GLcontextModes *glVisual,
+                                   __DRIcontextPrivate *driContextPriv,
+                                   void *sharedContextPrivate);
+extern void r200SwapBuffers( __DRIdrawablePrivate *dPriv );
+extern GLboolean r200MakeCurrent( __DRIcontextPrivate *driContextPriv,
+                                 __DRIdrawablePrivate *driDrawPriv,
+                                 __DRIdrawablePrivate *driReadPriv );
+extern GLboolean r200UnbindContext( __DRIcontextPrivate *driContextPriv );
+
+/* ================================================================
+ * Debugging:
+ */
+#define DO_DEBUG               1
+
+#if DO_DEBUG
+extern int R200_DEBUG;
+#else
+#define R200_DEBUG             0
+#endif
+
+#define DEBUG_TEXTURE  0x001
+#define DEBUG_STATE    0x002
+#define DEBUG_IOCTL    0x004
+#define DEBUG_PRIMS    0x008
+#define DEBUG_VERTS    0x010
+#define DEBUG_FALLBACKS        0x020
+#define DEBUG_VFMT     0x040
+#define DEBUG_CODEGEN  0x080
+#define DEBUG_VERBOSE  0x100
+#define DEBUG_DRI       0x200
+#define DEBUG_DMA       0x400
+#define DEBUG_SANITY    0x800
+#define DEBUG_SYNC      0x1000
+#define DEBUG_PIXEL     0x2000
+#define DEBUG_MEMORY    0x4000
+
+#endif
+#endif /* __R200_CONTEXT_H__ */
diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.c b/src/mesa/drivers/dri/r200/r200_ioctl.c
new file mode 100644 (file)
index 0000000..ea67216
--- /dev/null
@@ -0,0 +1,925 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include <sched.h>
+#include <errno.h>
+
+#include "glheader.h"
+#include "imports.h"
+#include "macros.h"
+#include "context.h"
+#include "swrast/swrast.h"
+
+#include "r200_context.h"
+#include "r200_state.h"
+#include "r200_ioctl.h"
+#include "r200_tcl.h"
+#include "r200_sanity.h"
+#include "radeon_reg.h"
+
+#include "vblank.h"
+
+
+#define R200_TIMEOUT             512
+#define R200_IDLE_RETRY           16
+
+
+static void r200WaitForIdle( r200ContextPtr rmesa );
+
+
+int r200FlushCmdBufLocked( r200ContextPtr rmesa, const char * caller )
+{
+   int ret, i;
+   drmRadeonCmdBuffer cmd;
+
+   if (R200_DEBUG & DEBUG_IOCTL) {
+      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); 
+
+      if (0 & R200_DEBUG & DEBUG_VERBOSE) 
+        for (i = 0 ; i < rmesa->store.cmd_used ; i += 4 )
+           fprintf(stderr, "%d: %x\n", i/4, 
+                   *(int *)(&rmesa->store.cmd_buf[i]));
+   }
+
+   if (R200_DEBUG & DEBUG_DMA)
+      fprintf(stderr, "%s: Releasing %d buffers\n", __FUNCTION__,
+             rmesa->dma.nr_released_bufs);
+
+
+   if (R200_DEBUG & DEBUG_SANITY) {
+      if (rmesa->state.scissor.enabled) 
+        ret = r200SanityCmdBuffer( rmesa, 
+                                   rmesa->state.scissor.numClipRects,
+                                   rmesa->state.scissor.pClipRects);
+      else
+        ret = r200SanityCmdBuffer( rmesa, 
+                                   rmesa->numClipRects,
+                                   rmesa->pClipRects);
+      if (ret) {
+        fprintf(stderr, "drmSanityCommandWrite: %d\n", ret);    
+        goto out;
+      }
+   }
+
+
+   if (R200_DEBUG & DEBUG_MEMORY) {
+      if (! driValidateTextureHeaps( rmesa->texture_heaps, rmesa->nr_heaps,
+                                    & rmesa->swapped ) ) {
+        fprintf( stderr, "%s: texture memory is inconsistent - expect "
+                 "mangled textures\n", __FUNCTION__ );
+      }
+   }
+
+
+   cmd.bufsz = rmesa->store.cmd_used;
+   cmd.buf = rmesa->store.cmd_buf;
+
+   if (rmesa->state.scissor.enabled) {
+      cmd.nbox = rmesa->state.scissor.numClipRects;
+      cmd.boxes = (drmClipRect *)rmesa->state.scissor.pClipRects;
+   } else {
+      cmd.nbox = rmesa->numClipRects;
+      cmd.boxes = (drmClipRect *)rmesa->pClipRects;
+   }
+
+   ret = drmCommandWrite( rmesa->dri.fd,
+                         DRM_RADEON_CMDBUF,
+                         &cmd, sizeof(cmd) );
+
+   if (ret)
+      fprintf(stderr, "drmCommandWrite: %d\n", ret);
+
+   if (R200_DEBUG & DEBUG_SYNC) {
+      fprintf(stderr, "\nSyncing in %s\n\n", __FUNCTION__);
+      r200WaitForIdleLocked( rmesa );
+   }
+
+
+ out:
+   rmesa->store.primnr = 0;
+   rmesa->store.statenr = 0;
+   rmesa->store.cmd_used = 0;
+   rmesa->dma.nr_released_bufs = 0;
+   rmesa->lost_context = 1;    
+   return ret;
+}
+
+
+/* Note: does not emit any commands to avoid recursion on
+ * r200AllocCmdBuf.
+ */
+void r200FlushCmdBuf( r200ContextPtr rmesa, const char *caller )
+{
+   int ret;
+
+   LOCK_HARDWARE( rmesa );
+
+   ret = r200FlushCmdBufLocked( rmesa, caller );
+
+   UNLOCK_HARDWARE( rmesa );
+
+   if (ret) {
+      fprintf(stderr, "drmRadeonCmdBuffer: %d (exiting)\n", ret);
+      exit(ret);
+   }
+}
+
+
+/* =============================================================
+ * Hardware vertex buffer handling
+ */
+
+
+void r200RefillCurrentDmaRegion( r200ContextPtr rmesa )
+{
+   struct r200_dma_buffer *dmabuf;
+   int fd = rmesa->dri.fd;
+   int index = 0;
+   int size = 0;
+   drmDMAReq dma;
+   int ret;
+
+   if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
+      fprintf(stderr, "%s\n", __FUNCTION__);  
+
+   if (rmesa->dma.flush) {
+      rmesa->dma.flush( rmesa );
+   }
+
+   if (rmesa->dma.current.buf)
+      r200ReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ );
+
+   if (rmesa->dma.nr_released_bufs > 4)
+      r200FlushCmdBuf( rmesa, __FUNCTION__ );
+
+   dma.context = rmesa->dri.hwContext;
+   dma.send_count = 0;
+   dma.send_list = NULL;
+   dma.send_sizes = NULL;
+   dma.flags = 0;
+   dma.request_count = 1;
+   dma.request_size = RADEON_BUFFER_SIZE;
+   dma.request_list = &index;
+   dma.request_sizes = &size;
+   dma.granted_count = 0;
+
+   LOCK_HARDWARE(rmesa);       /* no need to validate */
+
+   while (1) {
+      ret = drmDMA( fd, &dma );
+      if (ret == 0)
+        break;
+   
+      if (rmesa->dma.nr_released_bufs) {
+        r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
+      }
+
+      if (rmesa->do_usleeps) {
+        UNLOCK_HARDWARE( rmesa );
+        DO_USLEEP( 1 );
+        LOCK_HARDWARE( rmesa );
+      }
+   }
+
+   UNLOCK_HARDWARE(rmesa);
+
+   if (R200_DEBUG & DEBUG_DMA)
+      fprintf(stderr, "Allocated buffer %d\n", index);
+
+   dmabuf = CALLOC_STRUCT( r200_dma_buffer );
+   dmabuf->buf = &rmesa->r200Screen->buffers->list[index];
+   dmabuf->refcount = 1;
+
+   rmesa->dma.current.buf = dmabuf;
+   rmesa->dma.current.address = dmabuf->buf->address;
+   rmesa->dma.current.end = dmabuf->buf->total;
+   rmesa->dma.current.start = 0;
+   rmesa->dma.current.ptr = 0;
+}
+
+void r200ReleaseDmaRegion( r200ContextPtr rmesa,
+                            struct r200_dma_region *region,
+                            const char *caller )
+{
+   if (R200_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); 
+   
+   if (!region->buf)
+      return;
+
+   if (rmesa->dma.flush)
+      rmesa->dma.flush( rmesa );
+
+   if (--region->buf->refcount == 0) {
+      drmRadeonCmdHeader *cmd;
+
+      if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
+        fprintf(stderr, "%s -- DISCARD BUF %d\n", __FUNCTION__,
+                region->buf->buf->idx);  
+      
+      cmd = (drmRadeonCmdHeader *)r200AllocCmdBuf( rmesa, sizeof(*cmd), 
+                                                    __FUNCTION__ );
+      cmd->dma.cmd_type = RADEON_CMD_DMA_DISCARD;
+      cmd->dma.buf_idx = region->buf->buf->idx;
+      FREE(region->buf);
+      rmesa->dma.nr_released_bufs++;
+   }
+
+   region->buf = 0;
+   region->start = 0;
+}
+
+/* Allocates a region from rmesa->dma.current.  If there isn't enough
+ * space in current, grab a new buffer (and discard what was left of current)
+ */
+void r200AllocDmaRegion( r200ContextPtr rmesa, 
+                          struct r200_dma_region *region,
+                          int bytes,
+                          int alignment )
+{
+   if (R200_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
+
+   if (rmesa->dma.flush)
+      rmesa->dma.flush( rmesa );
+
+   if (region->buf)
+      r200ReleaseDmaRegion( rmesa, region, __FUNCTION__ );
+
+   alignment--;
+   rmesa->dma.current.start = rmesa->dma.current.ptr = 
+      (rmesa->dma.current.ptr + alignment) & ~alignment;
+
+   if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) 
+      r200RefillCurrentDmaRegion( rmesa );
+
+   region->start = rmesa->dma.current.start;
+   region->ptr = rmesa->dma.current.start;
+   region->end = rmesa->dma.current.start + bytes;
+   region->address = rmesa->dma.current.address;
+   region->buf = rmesa->dma.current.buf;
+   region->buf->refcount++;
+
+   rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */
+   rmesa->dma.current.start = 
+      rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;  
+
+   assert( rmesa->dma.current.ptr <= rmesa->dma.current.end );
+}
+
+void r200AllocDmaRegionVerts( r200ContextPtr rmesa, 
+                               struct r200_dma_region *region,
+                               int numverts,
+                               int vertsize,
+                               int alignment )
+{
+   r200AllocDmaRegion( rmesa, region, vertsize * numverts, alignment );
+}
+
+/* ================================================================
+ * SwapBuffers with client-side throttling
+ */
+
+static CARD32 r200GetLastFrame(r200ContextPtr rmesa)
+{
+   drmRadeonGetParam gp;
+   int ret;
+   CARD32 frame;
+
+   gp.param = RADEON_PARAM_LAST_FRAME;
+   gp.value = (int *)&frame;
+   ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_GETPARAM,
+                             &gp, sizeof(gp) );
+   if ( ret ) {
+      fprintf( stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, ret );
+      exit(1);
+   }
+
+   return frame;
+}
+
+static void r200EmitIrqLocked( r200ContextPtr rmesa )
+{
+   drmRadeonIrqEmit ie;
+   int ret;
+
+   ie.irq_seq = &rmesa->iw.irq_seq;
+   ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_IRQ_EMIT, 
+                             &ie, sizeof(ie) );
+   if ( ret ) {
+      fprintf( stderr, "%s: drmRadeonIrqEmit: %d\n", __FUNCTION__, ret );
+      exit(1);
+   }
+}
+
+
+static void r200WaitIrq( r200ContextPtr rmesa )
+{
+   int ret;
+
+   do {
+      ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_IRQ_WAIT,
+                            &rmesa->iw, sizeof(rmesa->iw) );
+   } while (ret && (errno == EINTR || errno == EAGAIN));
+
+   if ( ret ) {
+      fprintf( stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__, ret );
+      exit(1);
+   }
+}
+
+
+static void r200WaitForFrameCompletion( r200ContextPtr rmesa )
+{
+   RADEONSAREAPrivPtr sarea = rmesa->sarea;
+
+   if (rmesa->do_irqs) {
+      if (r200GetLastFrame(rmesa) < sarea->last_frame) {
+        if (!rmesa->irqsEmitted) {
+           while (r200GetLastFrame (rmesa) < sarea->last_frame)
+              ;
+        }
+        else {
+           UNLOCK_HARDWARE( rmesa ); 
+           r200WaitIrq( rmesa );       
+           LOCK_HARDWARE( rmesa ); 
+        }
+        rmesa->irqsEmitted = 10;
+      }
+
+      if (rmesa->irqsEmitted) {
+        r200EmitIrqLocked( rmesa );
+        rmesa->irqsEmitted--;
+      }
+   } 
+   else {
+      while (r200GetLastFrame (rmesa) < sarea->last_frame) {
+        UNLOCK_HARDWARE( rmesa ); 
+        if (rmesa->do_usleeps) 
+           DO_USLEEP( 1 );
+        LOCK_HARDWARE( rmesa ); 
+      }
+   }
+}
+
+
+
+/* Copy the back color buffer to the front color buffer.
+ */
+void r200CopyBuffer( const __DRIdrawablePrivate *dPriv )
+{
+   r200ContextPtr rmesa;
+   GLint nbox, i, ret;
+   GLboolean   missed_target;
+   int64_t     ust;
+
+   assert(dPriv);
+   assert(dPriv->driContextPriv);
+   assert(dPriv->driContextPriv->driverPrivate);
+
+   rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate;
+
+   if ( R200_DEBUG & DEBUG_IOCTL ) {
+      fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, rmesa->glCtx );
+   }
+
+   R200_FIREVERTICES( rmesa );
+
+   LOCK_HARDWARE( rmesa );
+
+
+   /* Throttle the frame rate -- only allow one pending swap buffers
+    * request at a time.
+    */
+   r200WaitForFrameCompletion( rmesa );
+   UNLOCK_HARDWARE( rmesa );
+   driWaitForVBlank( dPriv, & rmesa->vbl_seq, rmesa->vblank_flags, & missed_target );
+   LOCK_HARDWARE( rmesa );
+
+   nbox = dPriv->numClipRects; /* must be in locked region */
+
+   for ( i = 0 ; i < nbox ; ) {
+      GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS , nbox );
+      XF86DRIClipRectPtr box = dPriv->pClipRects;
+      XF86DRIClipRectPtr b = rmesa->sarea->boxes;
+      GLint n = 0;
+
+      for ( ; i < nr ; i++ ) {
+        *b++ = box[i];
+        n++;
+      }
+      rmesa->sarea->nbox = n;
+
+      ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP );
+
+      if ( ret ) {
+        fprintf( stderr, "DRM_R200_SWAP_BUFFERS: return = %d\n", ret );
+        UNLOCK_HARDWARE( rmesa );
+        exit( 1 );
+      }
+   }
+
+   UNLOCK_HARDWARE( rmesa );
+   rmesa->lost_context = 1;
+
+   rmesa->swap_count++;
+   (*rmesa->get_ust)( & ust );
+   if ( missed_target ) {
+      rmesa->swap_missed_count++;
+      rmesa->swap_missed_ust = ust - rmesa->swap_ust;
+   }
+
+   rmesa->swap_ust = ust;
+
+   sched_yield();
+}
+
+void r200PageFlip( const __DRIdrawablePrivate *dPriv )
+{
+   r200ContextPtr rmesa;
+   GLint ret;
+   GLboolean   missed_target;
+
+   assert(dPriv);
+   assert(dPriv->driContextPriv);
+   assert(dPriv->driContextPriv->driverPrivate);
+
+   rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate;
+
+   if ( R200_DEBUG & DEBUG_IOCTL ) {
+      fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__,
+             rmesa->sarea->pfCurrentPage);
+   }
+
+   R200_FIREVERTICES( rmesa );
+   LOCK_HARDWARE( rmesa );
+
+   if (!dPriv->numClipRects) {
+      UNLOCK_HARDWARE( rmesa );
+      usleep( 10000 );         /* throttle invisible client 10ms */
+      return;
+   }
+
+   /* Need to do this for the perf box placement:
+    */
+   {
+      XF86DRIClipRectPtr box = dPriv->pClipRects;
+      XF86DRIClipRectPtr b = rmesa->sarea->boxes;
+      b[0] = box[0];
+      rmesa->sarea->nbox = 1;
+   }
+
+   /* Throttle the frame rate -- only allow a few pending swap buffers
+    * request at a time.
+    */
+   r200WaitForFrameCompletion( rmesa );
+   UNLOCK_HARDWARE( rmesa );
+   driWaitForVBlank( dPriv, & rmesa->vbl_seq, rmesa->vblank_flags, & missed_target );
+   if ( missed_target ) {
+      rmesa->swap_missed_count++;
+      (void) (*rmesa->get_ust)( & rmesa->swap_missed_ust );
+   }
+   LOCK_HARDWARE( rmesa );
+
+   ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_FLIP );
+
+   UNLOCK_HARDWARE( rmesa );
+
+   if ( ret ) {
+      fprintf( stderr, "DRM_RADEON_FLIP: return = %d\n", ret );
+      exit( 1 );
+   }
+
+   rmesa->swap_count++;
+   (void) (*rmesa->get_ust)( & rmesa->swap_ust );
+
+   if ( rmesa->sarea->pfCurrentPage == 1 ) {
+        rmesa->state.color.drawOffset = rmesa->r200Screen->frontOffset;
+        rmesa->state.color.drawPitch  = rmesa->r200Screen->frontPitch;
+   } else {
+        rmesa->state.color.drawOffset = rmesa->r200Screen->backOffset;
+        rmesa->state.color.drawPitch  = rmesa->r200Screen->backPitch;
+   }
+
+   R200_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = rmesa->state.color.drawOffset;
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH]  = rmesa->state.color.drawPitch;
+}
+
+
+/* ================================================================
+ * Buffer clear
+ */
+static void r200Clear( GLcontext *ctx, GLbitfield mask, GLboolean all,
+                        GLint cx, GLint cy, GLint cw, GLint ch )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+   GLuint flags = 0;
+   GLuint color_mask = 0;
+   GLint ret, i;
+
+   if ( R200_DEBUG & DEBUG_IOCTL ) {
+      fprintf( stderr, "%s:  all=%d cx=%d cy=%d cw=%d ch=%d\n",
+              __FUNCTION__, all, cx, cy, cw, ch );
+   }
+
+   {
+      LOCK_HARDWARE( rmesa );
+      UNLOCK_HARDWARE( rmesa );
+      if ( dPriv->numClipRects == 0 ) 
+        return;
+   }
+
+   r200EmitState( rmesa );
+
+   /* Need to cope with lostcontext here as kernel relies on
+    * some residual state:
+    */
+   R200_FIREVERTICES( rmesa ); 
+
+   if ( mask & DD_FRONT_LEFT_BIT ) {
+      flags |= RADEON_FRONT;
+      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
+      mask &= ~DD_FRONT_LEFT_BIT;
+   }
+
+   if ( mask & DD_BACK_LEFT_BIT ) {
+      flags |= RADEON_BACK;
+      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
+      mask &= ~DD_BACK_LEFT_BIT;
+   }
+
+   if ( mask & DD_DEPTH_BIT ) {
+      if ( ctx->Depth.Mask ) flags |= RADEON_DEPTH; /* FIXME: ??? */
+      mask &= ~DD_DEPTH_BIT;
+   }
+
+   if ( (mask & DD_STENCIL_BIT) && rmesa->state.stencil.hwBuffer ) {
+      flags |= RADEON_STENCIL;
+      mask &= ~DD_STENCIL_BIT;
+   }
+
+   if ( mask ) {
+      if (R200_DEBUG & DEBUG_FALLBACKS)
+        fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, mask);
+      _swrast_Clear( ctx, mask, all, cx, cy, cw, ch );
+   }
+
+   if ( !flags ) 
+      return;
+
+   /* Flip top to bottom */
+   cx += dPriv->x;
+   cy  = dPriv->y + dPriv->h - cy - ch;
+
+   LOCK_HARDWARE( rmesa );
+
+   /* Throttle the number of clear ioctls we do.
+    */
+   while ( 1 ) {
+      drmRadeonGetParam gp;
+      int ret;
+      int clear;
+
+      gp.param = RADEON_PARAM_LAST_CLEAR;
+      gp.value = (int *)&clear;
+      ret = drmCommandWriteRead( rmesa->dri.fd,
+                     DRM_RADEON_GETPARAM, &gp, sizeof(gp) );
+
+      if ( ret ) {
+        fprintf( stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, ret );
+        exit(1);
+      }
+
+      /* Clear throttling needs more thought.
+       */
+      if ( rmesa->sarea->last_clear - clear <= 25 ) {
+        break;
+      }
+      
+      if (rmesa->do_usleeps) {
+        UNLOCK_HARDWARE( rmesa );
+        DO_USLEEP( 1 );
+        LOCK_HARDWARE( rmesa );
+      }
+   }
+
+
+   for ( i = 0 ; i < dPriv->numClipRects ; ) {
+      GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, dPriv->numClipRects );
+      XF86DRIClipRectPtr box = dPriv->pClipRects;
+      XF86DRIClipRectPtr b = rmesa->sarea->boxes;
+      drmRadeonClearType clear;
+      drmRadeonClearRect depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
+      GLint n = 0;
+
+      if ( !all ) {
+        for ( ; i < nr ; i++ ) {
+           GLint x = box[i].x1;
+           GLint y = box[i].y1;
+           GLint w = box[i].x2 - x;
+           GLint h = box[i].y2 - y;
+
+           if ( x < cx ) w -= cx - x, x = cx;
+           if ( y < cy ) h -= cy - y, y = cy;
+           if ( x + w > cx + cw ) w = cx + cw - x;
+           if ( y + h > cy + ch ) h = cy + ch - y;
+           if ( w <= 0 ) continue;
+           if ( h <= 0 ) continue;
+
+           b->x1 = x;
+           b->y1 = y;
+           b->x2 = x + w;
+           b->y2 = y + h;
+           b++;
+           n++;
+        }
+      } else {
+        for ( ; i < nr ; i++ ) {
+           *b++ = box[i];
+           n++;
+        }
+      }
+
+      rmesa->sarea->nbox = n;
+
+      clear.flags       = flags;
+      clear.clear_color = rmesa->state.color.clear;
+      clear.clear_depth = 0;   /* not used */
+      clear.color_mask  = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
+      clear.depth_mask  = rmesa->state.stencil.clear;
+      clear.depth_boxes = depth_boxes;
+
+      n--;
+      b = rmesa->sarea->boxes;
+      for ( ; n >= 0 ; n-- ) {
+        depth_boxes[n].f[RADEON_CLEAR_X1] = (float)b[n].x1;
+        depth_boxes[n].f[RADEON_CLEAR_Y1] = (float)b[n].y1;
+        depth_boxes[n].f[RADEON_CLEAR_X2] = (float)b[n].x2;
+        depth_boxes[n].f[RADEON_CLEAR_Y2] = (float)b[n].y2;
+        depth_boxes[n].f[RADEON_CLEAR_DEPTH] = ctx->Depth.Clear;
+      }
+
+      ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_CLEAR,
+                            &clear, sizeof(drmRadeonClearType));
+
+
+      if ( ret ) {
+        UNLOCK_HARDWARE( rmesa );
+        fprintf( stderr, "DRM_RADEON_CLEAR: return = %d\n", ret );
+        exit( 1 );
+      }
+   }
+
+   UNLOCK_HARDWARE( rmesa );
+   rmesa->lost_context = 1;
+}
+
+
+void r200WaitForIdleLocked( r200ContextPtr rmesa )
+{
+    int ret;
+    int i = 0;
+    
+    do {
+       ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_CP_IDLE);
+       if (ret) 
+         DO_USLEEP( 1 );
+    } while (ret && ++i < 100);
+    
+    if ( ret < 0 ) {
+       UNLOCK_HARDWARE( rmesa );
+       fprintf( stderr, "Error: R200 timed out... exiting\n" );
+       exit( -1 );
+    }
+}
+
+
+static void r200WaitForIdle( r200ContextPtr rmesa )
+{
+   LOCK_HARDWARE(rmesa);
+   r200WaitForIdleLocked( rmesa );
+   UNLOCK_HARDWARE(rmesa);
+}
+
+
+void r200Flush( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+
+   if (R200_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (rmesa->dma.flush)
+      rmesa->dma.flush( rmesa );
+
+   if (!is_empty_list(&rmesa->hw.dirty)) 
+      r200EmitState( rmesa );
+   
+   if (rmesa->store.cmd_used)
+      r200FlushCmdBuf( rmesa, __FUNCTION__ );
+}
+
+/* Make sure all commands have been sent to the hardware and have
+ * completed processing.
+ */
+void r200Finish( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   r200Flush( ctx );
+
+   if (rmesa->do_irqs) {
+      LOCK_HARDWARE( rmesa );
+      r200EmitIrqLocked( rmesa );
+      UNLOCK_HARDWARE( rmesa );
+      r200WaitIrq( rmesa );
+   }
+   else 
+      r200WaitForIdle( rmesa );
+}
+
+
+/* This version of AllocateMemoryNV allocates only agp memory, and
+ * only does so after the point at which the driver has been
+ * initialized.
+ *
+ * Theoretically a valid context isn't required.  However, in this
+ * implementation, it is, as I'm using the hardware lock to protect
+ * the kernel data structures, and the current context to get the
+ * device fd.
+ */
+void *r200AllocateMemoryNV(GLsizei size, GLfloat readfreq,
+                           GLfloat writefreq, GLfloat priority)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa;
+   int region_offset;
+   drmRadeonMemAlloc alloc;
+   int ret;
+
+   if (R200_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s sz %d %f/%f/%f\n", __FUNCTION__, size, readfreq, 
+             writefreq, priority);
+
+   if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || rmesa->r200Screen->IsPCI ) 
+      return NULL;
+
+   if (getenv("R200_NO_ALLOC"))
+      return NULL;
+   
+   if (rmesa->dri.drmMinor < 6) 
+      return NULL;
+      
+   alloc.region = RADEON_MEM_REGION_AGP;
+   alloc.alignment = 0;
+   alloc.size = size;
+   alloc.region_offset = &region_offset;
+
+   ret = drmCommandWriteRead( rmesa->r200Screen->driScreen->fd,
+                             DRM_RADEON_ALLOC,
+                             &alloc, sizeof(alloc));
+   
+   if (ret) {
+      fprintf(stderr, "%s: DRM_RADEON_ALLOC ret %d\n", __FUNCTION__, ret);
+      return NULL;
+   }
+   
+   {
+      char *region_start = (char *)rmesa->r200Screen->agpTextures.map;
+      return (void *)(region_start + region_offset);
+   }
+}
+
+
+/* Called via glXFreeMemoryNV() */
+void r200FreeMemoryNV(GLvoid *pointer)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa;
+   int region_offset;
+   drmRadeonMemFree memfree;
+   int ret;
+
+   if (R200_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s %p\n", __FUNCTION__, pointer);
+
+   if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || rmesa->r200Screen->IsPCI ) {
+      fprintf(stderr, "%s: no context\n", __FUNCTION__);
+      return;
+   }
+
+   if (rmesa->dri.drmMinor < 6) 
+      return;
+
+   region_offset = (char *)pointer - (char *)rmesa->r200Screen->agpTextures.map;
+
+   if (region_offset < 0 || 
+       region_offset > rmesa->r200Screen->agpTextures.size) {
+      fprintf(stderr, "offset %d outside range 0..%d\n", region_offset,
+             rmesa->r200Screen->agpTextures.size);
+      return;
+   }
+
+   memfree.region = RADEON_MEM_REGION_AGP;
+   memfree.region_offset = region_offset;
+   
+   ret = drmCommandWrite( rmesa->r200Screen->driScreen->fd,
+                         DRM_RADEON_FREE,
+                         &memfree, sizeof(memfree));
+   
+   if (ret) 
+      fprintf(stderr, "%s: DRM_RADEON_FREE ret %d\n", __FUNCTION__, ret);
+}
+
+/* Called via glXGetAGPOffsetMESA() */
+GLuint r200GetAGPOffset(const GLvoid *pointer)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa;
+   GLuint card_offset;
+
+   if (!ctx || !(rmesa = R200_CONTEXT(ctx)) ) {
+      fprintf(stderr, "%s: no context\n", __FUNCTION__);
+      return ~0;
+   }
+
+   if (!r200IsAgpMemory( rmesa, pointer, 0 ))
+      return ~0;
+
+   if (rmesa->dri.drmMinor < 6) 
+      return ~0;
+
+   card_offset = r200AgpOffsetFromVirtual( rmesa, pointer );
+
+   return card_offset - rmesa->r200Screen->agp_base;
+}
+
+
+GLboolean r200IsAgpMemory( r200ContextPtr rmesa, const GLvoid *pointer,
+                          GLint size )
+{
+   int offset = (char *)pointer - (char *)rmesa->r200Screen->agpTextures.map;
+   int valid = (size >= 0 &&
+               offset >= 0 &&
+               offset + size < rmesa->r200Screen->agpTextures.size);
+
+   if (R200_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "r200IsAgpMemory( %p ) : %d\n", pointer, valid );
+   
+   return valid;
+}
+
+
+GLuint r200AgpOffsetFromVirtual( r200ContextPtr rmesa, const GLvoid *pointer )
+{
+   int offset = (char *)pointer - (char *)rmesa->r200Screen->agpTextures.map;
+
+   if (offset < 0 || offset > rmesa->r200Screen->agpTextures.size)
+      return ~0;
+   else
+      return rmesa->r200Screen->agp_texture_offset + offset;
+}
+
+
+
+void r200InitIoctlFuncs( GLcontext *ctx )
+{
+    ctx->Driver.Clear = r200Clear;
+    ctx->Driver.Finish = r200Finish;
+    ctx->Driver.Flush = r200Flush;
+}
+
diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.h b/src/mesa/drivers/dri/r200/r200_ioctl.h
new file mode 100644 (file)
index 0000000..5025c1d
--- /dev/null
@@ -0,0 +1,191 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef __R200_IOCTL_H__
+#define __R200_IOCTL_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+#include "simple_list.h"
+#include "radeon_dri.h"
+#include "r200_lock.h"
+
+#include "xf86drm.h"
+#include "radeon_common.h"
+
+extern void r200EmitState( r200ContextPtr rmesa );
+extern void r200EmitVertexAOS( r200ContextPtr rmesa,
+                                GLuint vertex_size,
+                                GLuint offset );
+
+extern void r200EmitVbufPrim( r200ContextPtr rmesa,
+                               GLuint primitive,
+                               GLuint vertex_nr );
+
+extern void r200FlushElts( r200ContextPtr rmesa );
+
+extern GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa,
+                                          GLuint primitive,
+                                          GLuint min_nr );
+
+extern void r200EmitAOS( r200ContextPtr rmesa,
+                          struct r200_dma_region **regions,
+                          GLuint n,
+                          GLuint offset );
+
+extern void r200EmitBlit( r200ContextPtr rmesa,
+                         GLuint color_fmt,
+                         GLuint src_pitch,
+                         GLuint src_offset,
+                         GLuint dst_pitch,
+                         GLuint dst_offset,
+                         GLint srcx, GLint srcy,
+                         GLint dstx, GLint dsty,
+                         GLuint w, GLuint h );
+
+extern void r200EmitWait( r200ContextPtr rmesa, GLuint flags );
+
+extern void r200FlushCmdBuf( r200ContextPtr rmesa, const char * );
+extern int r200FlushCmdBufLocked( r200ContextPtr rmesa, const char * caller );
+
+extern void r200RefillCurrentDmaRegion( r200ContextPtr rmesa );
+
+extern void r200AllocDmaRegion( r200ContextPtr rmesa,
+                                 struct r200_dma_region *region,
+                                 int bytes, 
+                                 int alignment );
+
+extern void r200AllocDmaRegionVerts( r200ContextPtr rmesa,
+                                      struct r200_dma_region *region,
+                                      int numverts,
+                                      int vertsize, 
+                                      int alignment );
+
+extern void r200ReleaseDmaRegion( r200ContextPtr rmesa,
+                                   struct r200_dma_region *region,
+                                   const char *caller );
+
+extern void r200CopyBuffer( const __DRIdrawablePrivate *drawable );
+extern void r200PageFlip( const __DRIdrawablePrivate *drawable );
+extern void r200Flush( GLcontext *ctx );
+extern void r200Finish( GLcontext *ctx );
+extern void r200WaitForIdleLocked( r200ContextPtr rmesa );
+extern void r200WaitForVBlank( r200ContextPtr rmesa );
+extern void r200InitIoctlFuncs( GLcontext *ctx );
+
+extern void *r200AllocateMemoryNV( GLsizei size, GLfloat readfreq,
+                                  GLfloat writefreq, GLfloat priority );
+extern void r200FreeMemoryNV( GLvoid *pointer );
+extern GLuint r200GetAGPOffset( const GLvoid *pointer );
+extern GLboolean r200IsAgpMemory( r200ContextPtr rmesa, const GLvoid *pointer,
+                                 GLint size );
+
+extern GLuint r200AgpOffsetFromVirtual( r200ContextPtr rmesa, 
+                                       const GLvoid *pointer );
+
+/* ================================================================
+ * Helper macros:
+ */
+
+/* Close off the last primitive, if it exists.
+ */
+#define R200_NEWPRIM( rmesa )                  \
+do {                                           \
+   if ( rmesa->dma.flush )                     \
+      rmesa->dma.flush( rmesa );       \
+} while (0)
+
+/* Can accomodate several state changes and primitive changes without
+ * actually firing the buffer.
+ */
+#define R200_STATECHANGE( rmesa, ATOM )                        \
+do {                                                           \
+   R200_NEWPRIM( rmesa );                                      \
+   move_to_head( &(rmesa->hw.dirty), &(rmesa->hw.ATOM));       \
+} while (0)
+
+#define R200_DB_STATE( ATOM )                          \
+   memcpy( rmesa->hw.ATOM.lastcmd, rmesa->hw.ATOM.cmd, \
+          rmesa->hw.ATOM.cmd_size * 4)
+
+static __inline int R200_DB_STATECHANGE( 
+   r200ContextPtr rmesa,
+   struct r200_state_atom *atom )
+{
+   if (memcmp(atom->cmd, atom->lastcmd, atom->cmd_size*4)) {
+      int *tmp;
+      R200_NEWPRIM( rmesa );
+      move_to_head( &(rmesa->hw.dirty), atom );
+      tmp = atom->cmd; 
+      atom->cmd = atom->lastcmd;
+      atom->lastcmd = tmp;
+      return 1;
+   }
+   else
+      return 0;
+}
+
+
+/* Fire the buffered vertices no matter what.
+ */
+#define R200_FIREVERTICES( rmesa )                     \
+do {                                                   \
+   if ( rmesa->store.cmd_used || rmesa->dma.flush ) {  \
+      r200Flush( rmesa->glCtx );                       \
+   }                                                   \
+} while (0)
+
+/* Alloc space in the command buffer
+ */
+static __inline char *r200AllocCmdBuf( r200ContextPtr rmesa,
+                                        int bytes, const char *where )
+{
+   char * head;
+
+   if (rmesa->store.cmd_used + bytes > R200_CMD_BUF_SZ)
+      r200FlushCmdBuf( rmesa, where );
+
+   head = rmesa->store.cmd_buf + rmesa->store.cmd_used;
+   rmesa->store.cmd_used += bytes;
+   assert( rmesa->store.cmd_used <= R200_CMD_BUF_SZ );
+   return head;
+}
+
+
+
+
+#endif
+#endif /* __R200_IOCTL_H__ */
diff --git a/src/mesa/drivers/dri/r200/r200_lock.c b/src/mesa/drivers/dri/r200/r200_lock.c
new file mode 100644 (file)
index 0000000..c2c5d30
--- /dev/null
@@ -0,0 +1,116 @@
+/* $XFree86$ */
+/**************************************************************************
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "r200_context.h"
+#include "r200_lock.h"
+#include "r200_tex.h"
+#include "r200_state.h"
+#include "r200_ioctl.h"
+
+#if DEBUG_LOCKING
+char *prevLockFile = NULL;
+int prevLockLine = 0;
+#endif
+
+/* Turn on/off page flipping according to the flags in the sarea:
+ */
+static void
+r200UpdatePageFlipping( r200ContextPtr rmesa )
+{
+   int use_back;
+   rmesa->doPageFlip = rmesa->sarea->pfAllowPageFlip;
+
+   use_back = (rmesa->glCtx->Color._DrawDestMask == BACK_LEFT_BIT);
+   use_back ^= (rmesa->sarea->pfCurrentPage == 1);
+   
+   if (use_back) {
+        rmesa->state.color.drawOffset = rmesa->r200Screen->backOffset;
+        rmesa->state.color.drawPitch  = rmesa->r200Screen->backPitch;
+   } else {
+        rmesa->state.color.drawOffset = rmesa->r200Screen->frontOffset;
+        rmesa->state.color.drawPitch  = rmesa->r200Screen->frontPitch;
+   }
+
+   R200_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = rmesa->state.color.drawOffset;
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH]  = rmesa->state.color.drawPitch;
+}
+
+
+
+/* Update the hardware state.  This is called if another context has
+ * grabbed the hardware lock, which includes the X server.  This
+ * function also updates the driver's window state after the X server
+ * moves, resizes or restacks a window -- the change will be reflected
+ * in the drawable position and clip rects.  Since the X server grabs
+ * the hardware lock when it changes the window state, this routine will
+ * automatically be called after such a change.
+ */
+void r200GetLock( r200ContextPtr rmesa, GLuint flags )
+{
+   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+   RADEONSAREAPrivPtr sarea = rmesa->sarea;
+   int i;
+
+   drmGetLock( rmesa->dri.fd, rmesa->dri.hwContext, flags );
+
+   /* The window might have moved, so we might need to get new clip
+    * rects.
+    *
+    * NOTE: This releases and regrabs the hw lock to allow the X server
+    * to respond to the DRI protocol request for new drawable info.
+    * Since the hardware state depends on having the latest drawable
+    * clip rects, all state checking must be done _after_ this call.
+    */
+   DRI_VALIDATE_DRAWABLE_INFO( sPriv, dPriv );
+
+   if ( rmesa->lastStamp != dPriv->lastStamp ) {
+      r200UpdatePageFlipping( rmesa );
+      if (rmesa->glCtx->Color._DrawDestMask == BACK_LEFT_BIT)
+         r200SetCliprects( rmesa, GL_BACK_LEFT );
+      else
+         r200SetCliprects( rmesa, GL_FRONT_LEFT );
+      r200UpdateViewportOffset( rmesa->glCtx );
+      rmesa->lastStamp = dPriv->lastStamp;
+   }
+
+   if ( sarea->ctxOwner != rmesa->dri.hwContext ) {
+      sarea->ctxOwner = rmesa->dri.hwContext;
+   }
+
+   for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
+      DRI_AGE_TEXTURES( rmesa->texture_heaps[ i ] );
+   }
+}
diff --git a/src/mesa/drivers/dri/r200/r200_lock.h b/src/mesa/drivers/dri/r200/r200_lock.h
new file mode 100644 (file)
index 0000000..59bc1d6
--- /dev/null
@@ -0,0 +1,112 @@
+/* $XFree86$ */
+/**************************************************************************
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef __R200_LOCK_H__
+#define __R200_LOCK_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+extern void r200GetLock( r200ContextPtr rmesa, GLuint flags );
+
+/* Turn DEBUG_LOCKING on to find locking conflicts.
+ */
+#define DEBUG_LOCKING  0
+
+#if DEBUG_LOCKING
+extern char *prevLockFile;
+extern int prevLockLine;
+
+#define DEBUG_LOCK()                                                   \
+   do {                                                                        \
+      prevLockFile = (__FILE__);                                       \
+      prevLockLine = (__LINE__);                                       \
+   } while (0)
+
+#define DEBUG_RESET()                                                  \
+   do {                                                                        \
+      prevLockFile = 0;                                                        \
+      prevLockLine = 0;                                                        \
+   } while (0)
+
+#define DEBUG_CHECK_LOCK()                                             \
+   do {                                                                        \
+      if ( prevLockFile ) {                                            \
+        fprintf( stderr,                                               \
+                 "LOCK SET!\n\tPrevious %s:%d\n\tCurrent: %s:%d\n",    \
+                 prevLockFile, prevLockLine, __FILE__, __LINE__ );     \
+        exit( 1 );                                                     \
+      }                                                                        \
+   } while (0)
+
+#else
+
+#define DEBUG_LOCK()
+#define DEBUG_RESET()
+#define DEBUG_CHECK_LOCK()
+
+#endif
+
+/*
+ * !!! We may want to separate locks from locks with validation.  This
+ * could be used to improve performance for those things commands that
+ * do not do any drawing !!!
+ */
+
+
+/* Lock the hardware and validate our state.
+ */
+#define LOCK_HARDWARE( rmesa )                                 \
+   do {                                                                \
+      char __ret = 0;                                          \
+      DEBUG_CHECK_LOCK();                                      \
+      DRM_CAS( rmesa->dri.hwLock, rmesa->dri.hwContext,                \
+              (DRM_LOCK_HELD | rmesa->dri.hwContext), __ret ); \
+      if ( __ret )                                             \
+        r200GetLock( rmesa, 0 );                               \
+      DEBUG_LOCK();                                            \
+   } while (0)
+
+/* Unlock the hardware.
+ */
+#define UNLOCK_HARDWARE( rmesa )                                       \
+   do {                                                                        \
+      DRM_UNLOCK( rmesa->dri.fd,                                       \
+                 rmesa->dri.hwLock,                                    \
+                 rmesa->dri.hwContext );                               \
+      DEBUG_RESET();                                                   \
+   } while (0)
+
+#endif
+#endif /* __R200_LOCK_H__ */
diff --git a/src/mesa/drivers/dri/r200/r200_maos.c b/src/mesa/drivers/dri/r200/r200_maos.c
new file mode 100644 (file)
index 0000000..fd2bd51
--- /dev/null
@@ -0,0 +1,12 @@
+
+
+/* If using new packets, can choose either verts or arrays.
+ * Otherwise, must use verts.
+ */
+#include "r200_context.h"
+#define R200_MAOS_VERTS 0
+#if (R200_MAOS_VERTS) || (R200_OLD_PACKETS)
+#include "r200_maos_verts.c"
+#else
+#include "r200_maos_arrays.c"
+#endif
diff --git a/src/mesa/drivers/dri/r200/r200_maos.h b/src/mesa/drivers/dri/r200/r200_maos.h
new file mode 100644 (file)
index 0000000..0dfcc5c
--- /dev/null
@@ -0,0 +1,48 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef __R200_MAOS_H__
+#define __R200_MAOS_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+#include "r200_context.h"
+
+extern void r200EmitArrays( GLcontext *ctx, GLuint inputs );
+extern void r200ReleaseArrays( GLcontext *ctx, GLuint newinputs );
+
+#endif
+#endif
diff --git a/src/mesa/drivers/dri/r200/r200_maos_arrays.c b/src/mesa/drivers/dri/r200/r200_maos_arrays.c
new file mode 100644 (file)
index 0000000..f2abaff
--- /dev/null
@@ -0,0 +1,478 @@
+/* $XFree86$ */
+/**************************************************************************
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "glheader.h"
+#include "mtypes.h"
+#include "colormac.h"
+#include "imports.h"
+#include "macros.h"
+
+#include "swrast_setup/swrast_setup.h"
+#include "math/m_translate.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_imm_debug.h"
+
+#include "r200_context.h"
+#include "r200_ioctl.h"
+#include "r200_state.h"
+#include "r200_swtcl.h"
+#include "r200_maos.h"
+
+/* Usage:
+ *   - from r200_tcl_render
+ *   - call r200EmitArrays to ensure uptodate arrays in dma
+ *   - emit primitives (new type?) which reference the data
+ *       -- need to use elts for lineloop, quads, quadstrip/flat
+ *       -- other primitives are all well-formed (need tristrip-1,fake-poly)
+ *
+ */
+static void emit_ubyte_rgba3( GLcontext *ctx,
+                      struct r200_dma_region *rvb,
+                      char *data,
+                      int stride,
+                      int count )
+{
+   int i;
+   r200_color_t *out = (r200_color_t *)(rvb->start + rvb->address);
+
+   if (R200_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s count %d stride %d out %p\n",
+             __FUNCTION__, count, stride, out);
+
+   for (i = 0; i < count; i++) {
+      out->red   = *data;
+      out->green = *(data+1);
+      out->blue  = *(data+2);
+      out->alpha = 0xFF;
+      out++;
+      data += stride;
+   }
+}
+
+
+#if defined(USE_X86_ASM)
+#define COPY_DWORDS( dst, src, nr )                                    \
+do {                                                                   \
+       int __tmp;                                                      \
+       __asm__ __volatile__( "rep ; movsl"                             \
+                             : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
+                             : "0" (nr),                               \
+                               "D" ((long)dst),                        \
+                               "S" ((long)src) );                      \
+} while (0)
+#else
+#define COPY_DWORDS( dst, src, nr )            \
+do {                                           \
+   int j;                                      \
+   for ( j = 0 ; j < nr ; j++ )                        \
+      dst[j] = ((int *)src)[j];                        \
+   dst += nr;                                  \
+} while (0)
+#endif
+
+
+
+static void emit_ubyte_rgba4( GLcontext *ctx,
+                             struct r200_dma_region *rvb,
+                             char *data,
+                             int stride,
+                             int count )
+{
+   int i;
+   int *out = (int *)(rvb->address + rvb->start);
+
+   if (R200_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s count %d stride %d\n",
+             __FUNCTION__, count, stride);
+
+   if (stride == 4) {
+      for (i = 0; i < count; i++)
+        ((int *)out)[i] = LE32_TO_CPU(((int *)data)[i]);
+   } else {
+      for (i = 0; i < count; i++) {
+        *(int *)out++ = LE32_TO_CPU(*(int *)data);
+        data += stride;
+      }
+   }
+}
+
+
+static void emit_ubyte_rgba( GLcontext *ctx,
+                            struct r200_dma_region *rvb,
+                            char *data,
+                            int size,
+                            int stride,
+                            int count )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   if (R200_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
+
+   assert (!rvb->buf);
+
+   if (stride == 0) {
+      r200AllocDmaRegion( rmesa, rvb, 4, 4 );
+      count = 1;
+      rvb->aos_start = GET_START(rvb);
+      rvb->aos_stride = 0;
+      rvb->aos_size = 1;
+   }
+   else {
+      r200AllocDmaRegion( rmesa, rvb, 4 * count, 4 );  /* alignment? */
+      rvb->aos_start = GET_START(rvb);
+      rvb->aos_stride = 1;
+      rvb->aos_size = 1;
+   }
+
+   /* Emit the data
+    */
+   switch (size) {
+   case 3:
+      emit_ubyte_rgba3( ctx, rvb, data, stride, count );
+      break;
+   case 4:
+      emit_ubyte_rgba4( ctx, rvb, data, stride, count );
+      break;
+   default:
+      assert(0);
+      exit(1);
+      break;
+   }
+}
+
+
+
+
+static void emit_vec8( GLcontext *ctx,
+                      struct r200_dma_region *rvb,
+                      char *data,
+                      int stride,
+                      int count )
+{
+   int i;
+   int *out = (int *)(rvb->address + rvb->start);
+
+   if (R200_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s count %d stride %d\n",
+             __FUNCTION__, count, stride);
+
+   if (stride == 8)
+      COPY_DWORDS( out, data, count*2 );
+   else
+      for (i = 0; i < count; i++) {
+        out[0] = *(int *)data;
+        out[1] = *(int *)(data+4);
+        out += 2;
+        data += stride;
+      }
+}
+
+static void emit_vec12( GLcontext *ctx,
+                      struct r200_dma_region *rvb,
+                      char *data,
+                      int stride,
+                      int count )
+{
+   int i;
+   int *out = (int *)(rvb->address + rvb->start);
+
+   if (R200_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+             __FUNCTION__, count, stride, out, data);
+
+   if (stride == 12)
+      COPY_DWORDS( out, data, count*3 );
+   else
+      for (i = 0; i < count; i++) {
+        out[0] = *(int *)data;
+        out[1] = *(int *)(data+4);
+        out[2] = *(int *)(data+8);
+        out += 3;
+        data += stride;
+      }
+}
+
+static void emit_vec16( GLcontext *ctx,
+                       struct r200_dma_region *rvb,
+                       char *data,
+                       int stride,
+                       int count )
+{
+   int i;
+   int *out = (int *)(rvb->address + rvb->start);
+
+   if (R200_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s count %d stride %d\n",
+             __FUNCTION__, count, stride);
+
+   if (stride == 16)
+      COPY_DWORDS( out, data, count*4 );
+   else
+      for (i = 0; i < count; i++) {
+        out[0] = *(int *)data;
+        out[1] = *(int *)(data+4);
+        out[2] = *(int *)(data+8);
+        out[3] = *(int *)(data+12);
+        out += 4;
+        data += stride;
+      }
+}
+
+
+static void emit_vector( GLcontext *ctx,
+                        struct r200_dma_region *rvb,
+                        char *data,
+                        int size,
+                        int stride,
+                        int count )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   if (R200_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s count %d size %d stride %d\n",
+             __FUNCTION__, count, size, stride);
+
+   assert (!rvb->buf);
+
+   if (stride == 0) {
+      r200AllocDmaRegion( rmesa, rvb, size * 4, 4 );
+      count = 1;
+      rvb->aos_start = GET_START(rvb);
+      rvb->aos_stride = 0;
+      rvb->aos_size = size;
+   }
+   else {
+      r200AllocDmaRegion( rmesa, rvb, size * count * 4, 4 );   /* alignment? */
+      rvb->aos_start = GET_START(rvb);
+      rvb->aos_stride = size;
+      rvb->aos_size = size;
+   }
+
+   /* Emit the data
+    */
+   switch (size) {
+   case 2:
+      emit_vec8( ctx, rvb, data, stride, count );
+      break;
+   case 3:
+      emit_vec12( ctx, rvb, data, stride, count );
+      break;
+   case 4:
+      emit_vec16( ctx, rvb, data, stride, count );
+      break;
+   default:
+      assert(0);
+      exit(1);
+      break;
+   }
+
+}
+
+
+
+/* Emit any changed arrays to new agp memory, re-emit a packet to
+ * update the arrays.  
+ */
+void r200EmitArrays( GLcontext *ctx, GLuint inputs )
+{
+   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+   struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb;
+   struct r200_dma_region **component = rmesa->tcl.aos_components;
+   GLuint nr = 0;
+   GLuint vfmt0 = 0, vfmt1 = 0;
+   GLuint count = VB->Count;
+   
+   if (R200_DEBUG & DEBUG_VERTS) 
+      _tnl_print_vert_flags( __FUNCTION__, inputs );
+
+   if (1) {
+      if (!rmesa->tcl.obj.buf) 
+        emit_vector( ctx, 
+                     &rmesa->tcl.obj, 
+                     (char *)VB->ObjPtr->data,
+                     VB->ObjPtr->size,
+                     VB->ObjPtr->stride,
+                     count);
+
+      switch( VB->ObjPtr->size ) {
+      case 4: vfmt0 |= R200_VTX_W0;
+      case 3: vfmt0 |= R200_VTX_Z0;
+      case 2: 
+      default:
+       break;
+      }
+      component[nr++] = &rmesa->tcl.obj;
+   }
+   
+
+   if (inputs & VERT_BIT_NORMAL) {
+      if (!rmesa->tcl.norm.buf)
+        emit_vector( ctx, 
+                     &(rmesa->tcl.norm), 
+                     (char *)VB->NormalPtr->data,
+                     3,
+                     VB->NormalPtr->stride,
+                     count);
+
+      vfmt0 |= R200_VTX_N0;
+      component[nr++] = &rmesa->tcl.norm;
+   }
+
+   if (inputs & VERT_BIT_COLOR0) {
+      if (VB->ColorPtr[0]->Type == GL_UNSIGNED_BYTE) {
+        if (!rmesa->tcl.rgba.buf)
+           emit_ubyte_rgba( ctx, 
+                            &rmesa->tcl.rgba, 
+                            (char *)VB->ColorPtr[0]->Ptr,
+                            VB->ColorPtr[0]->Size,
+                            VB->ColorPtr[0]->StrideB,
+                            count);
+
+        vfmt0 |= R200_VTX_PK_RGBA << R200_VTX_COLOR_0_SHIFT; 
+      }
+      else {
+        int emitsize;
+
+        if (VB->ColorPtr[0]->Size == 4 &&
+            (VB->ColorPtr[0]->StrideB != 0 ||
+             ((GLfloat *)VB->ColorPtr[0]->Ptr)[3] != 1.0)) { 
+           vfmt0 |= R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT; 
+           emitsize = 4;
+        }
+        else { 
+           vfmt0 |= R200_VTX_FP_RGB << R200_VTX_COLOR_0_SHIFT; 
+           emitsize = 3;
+        }
+
+        if (!rmesa->tcl.rgba.buf)
+           emit_vector( ctx, 
+                        &(rmesa->tcl.rgba), 
+                        (char *)VB->ColorPtr[0]->Ptr,
+                        emitsize,
+                        VB->ColorPtr[0]->StrideB,
+                        count);
+      }
+
+      component[nr++] = &rmesa->tcl.rgba;
+   }
+
+
+   if (inputs & VERT_BIT_COLOR1) {
+      if (!rmesa->tcl.spec.buf) {
+        if (VB->SecondaryColorPtr[0]->Type != GL_UNSIGNED_BYTE)
+           r200_import_float_spec_colors( ctx );
+
+        emit_ubyte_rgba( ctx, 
+                         &rmesa->tcl.spec, 
+                         (char *)VB->SecondaryColorPtr[0]->Ptr,
+                         3,
+                         VB->SecondaryColorPtr[0]->StrideB,
+                         count);
+      }
+
+      /* How does this work?
+       */
+      vfmt0 |= R200_VTX_PK_RGBA << R200_VTX_COLOR_1_SHIFT; 
+      component[nr++] = &rmesa->tcl.spec;
+   }
+
+/*    vtx = (rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] & */
+/*       ~(R200_TCL_VTX_Q0|R200_TCL_VTX_Q1)); */
+      
+   if (inputs & VERT_BIT_TEX0) {
+      if (!rmesa->tcl.tex[0].buf)
+        emit_vector( ctx, 
+                     &(rmesa->tcl.tex[0]), 
+                     (char *)VB->TexCoordPtr[0]->data,
+                     VB->TexCoordPtr[0]->size,
+                     VB->TexCoordPtr[0]->stride,
+                     count );
+
+      vfmt1 |= VB->TexCoordPtr[0]->size << R200_VTX_TEX0_COMP_CNT_SHIFT;
+      component[nr++] = &rmesa->tcl.tex[0];
+   }
+
+   if (inputs & VERT_BIT_TEX1) {
+      if (!rmesa->tcl.tex[1].buf)
+        emit_vector( ctx, 
+                     &(rmesa->tcl.tex[1]), 
+                     (char *)VB->TexCoordPtr[1]->data,
+                     VB->TexCoordPtr[1]->size,
+                     VB->TexCoordPtr[1]->stride,
+                     count );
+        
+      vfmt1 |= VB->TexCoordPtr[1]->size << R200_VTX_TEX1_COMP_CNT_SHIFT;
+      component[nr++] = &rmesa->tcl.tex[1];
+   }
+
+   if (vfmt0 != rmesa->hw.vtx.cmd[VTX_VTXFMT_0] ||
+       vfmt1 != rmesa->hw.vtx.cmd[VTX_VTXFMT_1]) { 
+      R200_STATECHANGE( rmesa, vtx ); 
+      rmesa->hw.vtx.cmd[VTX_VTXFMT_0] = vfmt0;
+      rmesa->hw.vtx.cmd[VTX_VTXFMT_1] = vfmt1;
+   } 
+
+   rmesa->tcl.nr_aos_components = nr;
+   rmesa->tcl.vertex_format = vfmt0;
+}
+
+
+void r200ReleaseArrays( GLcontext *ctx, GLuint newinputs )
+{
+   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+
+   if (R200_DEBUG & DEBUG_VERTS) 
+      _tnl_print_vert_flags( __FUNCTION__, newinputs );
+
+   if (newinputs & VERT_BIT_POS) 
+     r200ReleaseDmaRegion( rmesa, &rmesa->tcl.obj, __FUNCTION__ );
+
+   if (newinputs & VERT_BIT_NORMAL) 
+      r200ReleaseDmaRegion( rmesa, &rmesa->tcl.norm, __FUNCTION__ );
+
+   if (newinputs & VERT_BIT_COLOR0) 
+      r200ReleaseDmaRegion( rmesa, &rmesa->tcl.rgba, __FUNCTION__ );
+
+   if (newinputs & VERT_BIT_COLOR1) 
+      r200ReleaseDmaRegion( rmesa, &rmesa->tcl.spec, __FUNCTION__ );
+
+   if (newinputs & VERT_BIT_TEX0)
+      r200ReleaseDmaRegion( rmesa, &rmesa->tcl.tex[0], __FUNCTION__ );
+
+   if (newinputs & VERT_BIT_TEX1)
+      r200ReleaseDmaRegion( rmesa, &rmesa->tcl.tex[1], __FUNCTION__ );
+}
diff --git a/src/mesa/drivers/dri/r200/r200_maos_vbtmp.h b/src/mesa/drivers/dri/r200/r200_maos_vbtmp.h
new file mode 100644 (file)
index 0000000..a36be45
--- /dev/null
@@ -0,0 +1,378 @@
+/* $XFree86$ */
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef LOCALVARS
+#define LOCALVARS
+#endif
+
+#undef TCL_DEBUG
+#ifndef TCL_DEBUG
+#define TCL_DEBUG 0
+#endif
+
+static void TAG(emit)( GLcontext *ctx,
+                      GLuint start, GLuint end,
+                      void *dest )
+{
+   LOCALVARS
+      struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLuint (*tc0)[4], (*tc1)[4];
+   GLfloat *fog;
+   GLuint (*tc2)[4], (*norm)[3];
+   GLubyte (*col)[4], (*spec)[4];
+   GLuint tc0_stride, tc1_stride, col_stride, spec_stride, fog_stride;
+   GLuint tc2_stride, norm_stride;
+   GLuint (*coord)[4];
+   GLuint coord_stride;
+   GLubyte dummy[4];
+   int i;
+
+   union emit_union *v = (union emit_union *)dest;
+
+
+   if (R200_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s\n", __FUNCTION__); 
+
+   /* The vertex code expects Obj to be clean to element 3.  To fix
+    * this, add more vertex code (for obj-2, obj-3) or preferably move
+    * to maos.  
+    */
+   if (VB->ObjPtr->size < 3) {
+      if (VB->ObjPtr->flags & VEC_NOT_WRITEABLE) {
+        VB->import_data( ctx, VERT_OBJ, VEC_NOT_WRITEABLE );
+      }
+      _mesa_vector4f_clean_elem( VB->ObjPtr, VB->Count, 2 );
+   }
+
+   if (DO_W && VB->ObjPtr->size < 4) {
+      if (VB->ObjPtr->flags & VEC_NOT_WRITEABLE) {
+        VB->import_data( ctx, VERT_OBJ, VEC_NOT_WRITEABLE );
+      }
+      _mesa_vector4f_clean_elem( VB->ObjPtr, VB->Count, 3 );
+   }
+
+   coord = (GLuint (*)[4])VB->ObjPtr->data;
+   coord_stride = VB->ObjPtr->stride;
+
+   if (DO_TEX2) {
+      const GLuint t2 = GET_TEXSOURCE(2);
+      tc2 = (GLuint (*)[4])VB->TexCoordPtr[t2]->data;
+      tc2_stride = VB->TexCoordPtr[t2]->stride;
+      if (DO_PTEX && VB->TexCoordPtr[t2]->size < 4) {
+        if (VB->TexCoordPtr[t2]->flags & VEC_NOT_WRITEABLE) {
+           VB->import_data( ctx, VERT_TEX2, VEC_NOT_WRITEABLE );
+        }
+        _mesa_vector4f_clean_elem( VB->TexCoordPtr[t2], VB->Count, 3 );
+      }
+   }
+
+   if (DO_TEX1) {
+      if (VB->TexCoordPtr[1]) {
+        const GLuint t1 = GET_TEXSOURCE(1);
+        tc1 = (GLuint (*)[4])VB->TexCoordPtr[t1]->data;
+        tc1_stride = VB->TexCoordPtr[t1]->stride;
+        if (DO_PTEX && VB->TexCoordPtr[t1]->size < 4) {
+           if (VB->TexCoordPtr[t1]->flags & VEC_NOT_WRITEABLE) {
+              VB->import_data( ctx, VERT_TEX1, VEC_NOT_WRITEABLE );
+           }
+           _mesa_vector4f_clean_elem( VB->TexCoordPtr[t1], VB->Count, 3 );
+        }
+      } else {
+        tc1 = (GLuint (*)[4])&ctx->Current.Texcoord[1]; /* could be anything, really */
+        tc1_stride = 0;
+      }
+   }
+
+   if (DO_TEX0) {
+      if (VB->TexCoordPtr[0]) {
+        const GLuint t0 = GET_TEXSOURCE(0);
+        tc0_stride = VB->TexCoordPtr[t0]->stride;
+        tc0 = (GLuint (*)[4])VB->TexCoordPtr[t0]->data;
+        if (DO_PTEX && VB->TexCoordPtr[t0]->size < 4) {
+           if (VB->TexCoordPtr[t0]->flags & VEC_NOT_WRITEABLE) {
+              VB->import_data( ctx, VERT_TEX0, VEC_NOT_WRITEABLE );
+           }
+           _mesa_vector4f_clean_elem( VB->TexCoordPtr[t0], VB->Count, 3 );
+        }
+      } else {
+        tc0 = (GLuint (*)[4])&ctx->Current.Texcoord[0]; /* could be anything, really */
+        tc0_stride = 0;
+      }
+        
+   }
+
+   if (DO_NORM) {
+      if (VB->NormalPtr) {
+        norm_stride = VB->NormalPtr->stride;
+        norm = (GLuint (*)[3])VB->NormalPtr->data;
+      } else {
+        norm_stride = 0;
+        norm = (GLuint (*)[3])&ctx->Current.Normal;
+      }
+   }
+
+   if (DO_RGBA) {
+      if (VB->ColorPtr[0]) {
+        /* This is incorrect when colormaterial is enabled:
+         */
+        if (VB->ColorPtr[0]->Type != GL_UNSIGNED_BYTE) {
+           if (0) fprintf(stderr, "IMPORTING FLOAT COLORS\n");
+           IMPORT_FLOAT_COLORS( ctx );
+        }
+        col = (GLubyte (*)[4])VB->ColorPtr[0]->Ptr;
+        col_stride = VB->ColorPtr[0]->StrideB;
+      } else {
+        col = &dummy; /* any old memory is fine */
+        col_stride = 0;
+      }
+      
+   }
+
+   if (DO_SPEC) {
+      if (VB->SecondaryColorPtr[0]) {
+        if (VB->SecondaryColorPtr[0]->Type != GL_UNSIGNED_BYTE)
+           IMPORT_FLOAT_SPEC_COLORS( ctx );
+        spec = (GLubyte (*)[4])VB->SecondaryColorPtr[0]->Ptr;
+        spec_stride = VB->SecondaryColorPtr[0]->StrideB;
+      } else {
+        spec = &dummy;
+        spec_stride = 0;
+      }
+        
+   }
+
+   if (DO_FOG) {
+      if (VB->FogCoordPtr) {
+        fog = VB->FogCoordPtr->data;
+        fog_stride = VB->FogCoordPtr->stride;
+      } else {
+        fog = (GLfloat *)&dummy; *fog = 0;
+        fog_stride = 0;
+      }
+             
+   }
+   
+   
+   if (VB->importable_data) {
+      if (start) {
+        coord =  (GLuint (*)[4])((GLubyte *)coord + start * coord_stride);
+        if (DO_TEX0)
+           tc0 =  (GLuint (*)[4])((GLubyte *)tc0 + start * tc0_stride);
+        if (DO_TEX1) 
+           tc1 =  (GLuint (*)[4])((GLubyte *)tc1 + start * tc1_stride);
+        if (DO_TEX2) 
+           tc2 =  (GLuint (*)[4])((GLubyte *)tc2 + start * tc2_stride);
+        if (DO_NORM) 
+           norm =  (GLuint (*)[3])((GLubyte *)norm + start * norm_stride);
+        if (DO_RGBA) 
+           STRIDE_4UB(col, start * col_stride);
+        if (DO_SPEC)
+           STRIDE_4UB(spec, start * spec_stride);
+        if (DO_FOG)
+           STRIDE_F(fog, start * fog_stride);
+      }
+
+      for (i=start; i < end; i++) {
+        v[0].ui = coord[0][0];
+        v[1].ui = coord[0][1];
+        v[2].ui = coord[0][2];
+        if (TCL_DEBUG) fprintf(stderr, "%d: %.2f %.2f %.2f ", i, v[0].f, v[1].f, v[2].f);
+        if (DO_W) {
+           v[3].ui = coord[0][3];
+           if (TCL_DEBUG) fprintf(stderr, "%.2f ", v[3].f);
+           v += 4;
+        } 
+        else
+           v += 3;
+        coord =  (GLuint (*)[4])((GLubyte *)coord +  coord_stride);
+
+        if (DO_NORM) {
+           v[0].ui = norm[0][0];
+           v[1].ui = norm[0][1];
+           v[2].ui = norm[0][2];
+           if (TCL_DEBUG) fprintf(stderr, "norm: %.2f %.2f %.2f ", v[0].f, v[1].f, v[2].f);
+           v += 3;
+           norm =  (GLuint (*)[3])((GLubyte *)norm +  norm_stride);
+        }
+        if (DO_RGBA) {
+           v[0].ui = LE32_TO_CPU(*(GLuint *)&col[0]);
+           STRIDE_4UB(col, col_stride);
+           if (TCL_DEBUG) fprintf(stderr, "%x ", v[0].ui);
+           v++;
+        }
+        if (DO_SPEC || DO_FOG) {
+           if (DO_SPEC) {
+              v[0].ub[0] = spec[0][0];
+              v[0].ub[1] = spec[0][1];
+              v[0].ub[2] = spec[0][2];
+              STRIDE_4UB(spec, spec_stride);
+           }
+           if (DO_FOG) {
+              v[0].ub[3] = fog[0] * 255.0;
+              STRIDE_F(fog, fog_stride);
+           }
+           if (TCL_DEBUG) fprintf(stderr, "%x ", v[0].ui);
+           v++;
+        }
+        if (DO_TEX0) {
+           v[0].ui = tc0[0][0];
+           v[1].ui = tc0[0][1];
+           if (TCL_DEBUG) fprintf(stderr, "t0: %.2f %.2f ", v[0].f, v[1].f);
+           if (DO_PTEX) {
+              v[2].ui = tc0[0][3];
+              if (TCL_DEBUG) fprintf(stderr, "%.2f ", v[2].f);
+              v += 3;
+           } 
+           else
+              v += 2;
+           tc0 =  (GLuint (*)[4])((GLubyte *)tc0 +  tc0_stride);
+        }
+        if (DO_TEX1) {
+           v[0].ui = tc1[0][0];
+           v[1].ui = tc1[0][1];
+           if (TCL_DEBUG) fprintf(stderr, "t1: %.2f %.2f ", v[0].f, v[1].f);
+           if (DO_PTEX) {
+              v[2].ui = tc1[0][3];
+              if (TCL_DEBUG) fprintf(stderr, "%.2f ", v[2].f);
+              v += 3;
+           } 
+           else
+              v += 2;
+           tc1 =  (GLuint (*)[4])((GLubyte *)tc1 +  tc1_stride);
+        } 
+        if (DO_TEX2) {
+           v[0].ui = tc2[0][0];
+           v[1].ui = tc2[0][1];
+           if (DO_PTEX) {
+              v[2].ui = tc2[0][3];
+              v += 3;
+           } 
+           else
+              v += 2;
+           tc2 =  (GLuint (*)[4])((GLubyte *)tc2 +  tc2_stride);
+        } 
+        if (TCL_DEBUG) fprintf(stderr, "\n");
+      }
+   } else {
+      for (i=start; i < end; i++) {
+        v[0].ui = coord[i][0];
+        v[1].ui = coord[i][1];
+        v[2].ui = coord[i][2];
+        if (DO_W) {
+           v[3].ui = coord[i][3];
+           v += 4;
+        } 
+        else
+           v += 3;
+
+        if (DO_NORM) {
+           v[0].ui = norm[i][0];
+           v[1].ui = norm[i][1];
+           v[2].ui = norm[i][2];
+           v += 3;
+        }
+        if (DO_RGBA) {
+           v[0].ui = LE32_TO_CPU(*(GLuint *)&col[i]);
+           v++;
+        }
+        if (DO_SPEC || DO_FOG) {
+           if (DO_SPEC) {
+              v[0].ub[0] = spec[i][0];
+              v[0].ub[1] = spec[i][1];
+              v[0].ub[2] = spec[i][2];
+           }
+           if (DO_FOG) {
+              v[0].ub[3] = fog[i] * 255.0;
+           }
+           v++;
+        }
+        if (DO_TEX0) {
+           v[0].ui = tc0[i][0];
+           v[1].ui = tc0[i][1];
+           if (DO_PTEX) {
+              v[2].ui = tc0[i][3];
+              v += 3;
+           } 
+           else
+              v += 2;
+        }
+        if (DO_TEX1) {
+           v[0].ui = tc1[i][0];
+           v[1].ui = tc1[i][1];
+           if (DO_PTEX) {
+              v[2].ui = tc1[i][3];
+              v += 3;
+           } 
+           else
+              v += 2;
+        } 
+        if (DO_TEX2) {
+           v[0].ui = tc2[i][0];
+           v[1].ui = tc2[i][1];
+           if (DO_PTEX) {
+              v[2].ui = tc2[i][3];
+              v += 3;
+           } 
+           else
+              v += 2;
+        } 
+      }
+   }
+}
+
+
+
+static void TAG(init)( void )
+{
+   int sz = 3;
+   if (DO_W) sz++;
+   if (DO_NORM) sz += 3;
+   if (DO_RGBA) sz++;
+   if (DO_SPEC || DO_FOG) sz++;
+   if (DO_TEX0) sz += 2;
+   if (DO_TEX0 && DO_PTEX) sz++;
+   if (DO_TEX1) sz += 2;
+   if (DO_TEX1 && DO_PTEX) sz++;
+   if (DO_TEX2) sz += 2;
+   if (DO_TEX2 && DO_PTEX) sz++;
+
+   setup_tab[IDX].emit = TAG(emit);
+   setup_tab[IDX].vertex_format = IND;
+   setup_tab[IDX].vertex_size = sz;
+}
+
+
+#undef IND
+#undef TAG
+#undef IDX
diff --git a/src/mesa/drivers/dri/r200/r200_maos_verts.c b/src/mesa/drivers/dri/r200/r200_maos_verts.c
new file mode 100644 (file)
index 0000000..cd866f6
--- /dev/null
@@ -0,0 +1,340 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "glheader.h"
+#include "imports.h"
+#include "mmath.h"
+#include "mtypes.h"
+#include "enums.h"
+#include "colormac.h"
+#include "light.h"
+
+#include "array_cache/acache.h"
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+#include "tnl/t_imm_debug.h"
+
+#include "r200_context.h"
+#include "r200_state.h"
+#include "r200_ioctl.h"
+#include "r200_tex.h"
+#include "r200_tcl.h"
+#include "r200_swtcl.h"
+#include "r200_maos.h"
+
+
+#define R200_TCL_MAX_SETUP 13
+
+union emit_union { float f; GLuint ui; GLubyte ub[4]; };
+
+static struct {
+   void   (*emit)( GLcontext *, GLuint, GLuint, void * );
+   GLuint vertex_size;
+   GLuint vertex_format;
+} setup_tab[R200_TCL_MAX_SETUP];
+
+#define DO_W    (IND & R200_CP_VC_FRMT_W0)
+#define DO_RGBA (IND & R200_CP_VC_FRMT_PKCOLOR)
+#define DO_SPEC (IND & R200_CP_VC_FRMT_PKSPEC)
+#define DO_FOG  (IND & R200_CP_VC_FRMT_PKSPEC)
+#define DO_TEX0 (IND & R200_CP_VC_FRMT_ST0)
+#define DO_TEX1 (IND & R200_CP_VC_FRMT_ST1)
+#define DO_PTEX (IND & R200_CP_VC_FRMT_Q0)
+#define DO_NORM (IND & R200_CP_VC_FRMT_N0)
+
+#define DO_TEX2 0
+#define DO_TEX3 0
+
+#define GET_TEXSOURCE(n)  n
+#define GET_UBYTE_COLOR_STORE() &R200_CONTEXT(ctx)->UbyteColor
+#define GET_UBYTE_SPEC_COLOR_STORE() &R200_CONTEXT(ctx)->UbyteSecondaryColor
+
+#define IMPORT_FLOAT_COLORS r200_import_float_colors
+#define IMPORT_FLOAT_SPEC_COLORS r200_import_float_spec_colors
+
+/***********************************************************************
+ *             Generate vertex emit functions               *
+ ***********************************************************************/
+
+
+/* Defined in order of increasing vertex size:
+ */
+#define IDX 0
+#define IND (R200_CP_VC_FRMT_XY|               \
+            R200_CP_VC_FRMT_Z|         \
+            R200_CP_VC_FRMT_PKCOLOR)
+#define TAG(x) x##_rgba
+#include "r200_maos_vbtmp.h"
+
+#define IDX 1
+#define IND (R200_CP_VC_FRMT_XY|               \
+            R200_CP_VC_FRMT_Z|         \
+            R200_CP_VC_FRMT_N0)
+#define TAG(x) x##_n
+#include "r200_maos_vbtmp.h"
+
+#define IDX 2
+#define IND (R200_CP_VC_FRMT_XY|               \
+            R200_CP_VC_FRMT_Z|         \
+            R200_CP_VC_FRMT_PKCOLOR|           \
+            R200_CP_VC_FRMT_ST0)
+#define TAG(x) x##_rgba_st
+#include "r200_maos_vbtmp.h"
+
+#define IDX 3
+#define IND (R200_CP_VC_FRMT_XY|               \
+            R200_CP_VC_FRMT_Z|         \
+            R200_CP_VC_FRMT_PKCOLOR|           \
+            R200_CP_VC_FRMT_N0)
+#define TAG(x) x##_rgba_n
+#include "r200_maos_vbtmp.h"
+
+#define IDX 4
+#define IND (R200_CP_VC_FRMT_XY|               \
+            R200_CP_VC_FRMT_Z|         \
+            R200_CP_VC_FRMT_ST0|               \
+            R200_CP_VC_FRMT_N0)
+#define TAG(x) x##_st_n
+#include "r200_maos_vbtmp.h"
+
+#define IDX 5
+#define IND (R200_CP_VC_FRMT_XY|               \
+            R200_CP_VC_FRMT_Z|         \
+            R200_CP_VC_FRMT_PKCOLOR|           \
+            R200_CP_VC_FRMT_ST0|               \
+            R200_CP_VC_FRMT_ST1)
+#define TAG(x) x##_rgba_st_st
+#include "r200_maos_vbtmp.h"
+
+#define IDX 6
+#define IND (R200_CP_VC_FRMT_XY|               \
+            R200_CP_VC_FRMT_Z|         \
+            R200_CP_VC_FRMT_PKCOLOR|           \
+            R200_CP_VC_FRMT_ST0|               \
+            R200_CP_VC_FRMT_N0)
+#define TAG(x) x##_rgba_st_n
+#include "r200_maos_vbtmp.h"
+
+#define IDX 7
+#define IND (R200_CP_VC_FRMT_XY|               \
+            R200_CP_VC_FRMT_Z|         \
+            R200_CP_VC_FRMT_PKCOLOR|           \
+            R200_CP_VC_FRMT_PKSPEC|            \
+            R200_CP_VC_FRMT_ST0|               \
+            R200_CP_VC_FRMT_ST1)
+#define TAG(x) x##_rgba_spec_st_st
+#include "r200_maos_vbtmp.h"
+
+#define IDX 8
+#define IND (R200_CP_VC_FRMT_XY|               \
+            R200_CP_VC_FRMT_Z|         \
+            R200_CP_VC_FRMT_ST0|               \
+            R200_CP_VC_FRMT_ST1|               \
+            R200_CP_VC_FRMT_N0)
+#define TAG(x) x##_st_st_n
+#include "r200_maos_vbtmp.h"
+
+#define IDX 9
+#define IND (R200_CP_VC_FRMT_XY|               \
+            R200_CP_VC_FRMT_Z|         \
+            R200_CP_VC_FRMT_PKCOLOR|           \
+            R200_CP_VC_FRMT_PKSPEC|            \
+            R200_CP_VC_FRMT_ST0|               \
+            R200_CP_VC_FRMT_ST1|               \
+            R200_CP_VC_FRMT_N0)
+#define TAG(x) x##_rgpa_spec_st_st_n
+#include "r200_maos_vbtmp.h"
+
+#define IDX 10
+#define IND (R200_CP_VC_FRMT_XY|               \
+            R200_CP_VC_FRMT_Z|         \
+            R200_CP_VC_FRMT_PKCOLOR|           \
+            R200_CP_VC_FRMT_ST0|               \
+            R200_CP_VC_FRMT_Q0)
+#define TAG(x) x##_rgba_stq
+#include "r200_maos_vbtmp.h"
+
+#define IDX 11
+#define IND (R200_CP_VC_FRMT_XY|               \
+            R200_CP_VC_FRMT_Z|         \
+            R200_CP_VC_FRMT_PKCOLOR|           \
+            R200_CP_VC_FRMT_ST1|               \
+            R200_CP_VC_FRMT_Q1|                \
+            R200_CP_VC_FRMT_ST0|               \
+            R200_CP_VC_FRMT_Q0)
+#define TAG(x) x##_rgba_stq_stq
+#include "r200_maos_vbtmp.h"
+
+#define IDX 12
+#define IND (R200_CP_VC_FRMT_XY|               \
+            R200_CP_VC_FRMT_Z|         \
+            R200_CP_VC_FRMT_W0|                \
+            R200_CP_VC_FRMT_PKCOLOR|           \
+            R200_CP_VC_FRMT_PKSPEC|            \
+            R200_CP_VC_FRMT_ST0|               \
+            R200_CP_VC_FRMT_Q0|                \
+            R200_CP_VC_FRMT_ST1|               \
+            R200_CP_VC_FRMT_Q1|                \
+            R200_CP_VC_FRMT_N0)
+#define TAG(x) x##_w_rgpa_spec_stq_stq_n
+#include "r200_maos_vbtmp.h"
+
+
+
+
+
+/***********************************************************************
+ *                         Initialization 
+ ***********************************************************************/
+
+
+static void init_tcl_verts( void )
+{
+   init_rgba();
+   init_n();
+   init_rgba_n();
+   init_rgba_st();
+   init_st_n();
+   init_rgba_st_st();
+   init_rgba_st_n();
+   init_rgba_spec_st_st();
+   init_st_st_n();
+   init_rgpa_spec_st_st_n();
+   init_rgba_stq();
+   init_rgba_stq_stq();
+   init_w_rgpa_spec_stq_stq_n();
+}
+
+
+void r200EmitArrays( GLcontext *ctx, GLuint inputs )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLuint req = 0;
+   GLuint vtx = (rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &
+                ~(R200_TCL_VTX_Q0|R200_TCL_VTX_Q1));
+   int i;
+   static int firsttime = 1;
+
+   if (firsttime) {
+      init_tcl_verts();
+      firsttime = 0;
+   }
+                    
+   if (1) {
+      req |= R200_CP_VC_FRMT_Z;
+      if (VB->ObjPtr->size == 4) {
+        req |= R200_CP_VC_FRMT_W0;
+      }
+   }
+
+   if (inputs & VERT_BIT_NORMAL) {
+      req |= R200_CP_VC_FRMT_N0;
+   }
+   
+   if (inputs & VERT_BIT_COLOR0) {
+      req |= R200_CP_VC_FRMT_PKCOLOR;
+   }
+
+   if (inputs & VERT_BIT_COLOR1) {
+      req |= R200_CP_VC_FRMT_PKSPEC;
+   }
+
+   if (inputs & VERT_BIT_TEX0) {
+      req |= R200_CP_VC_FRMT_ST0;
+
+      if (VB->TexCoordPtr[0]->size == 4) {
+        req |= R200_CP_VC_FRMT_Q0;
+        vtx |= R200_TCL_VTX_Q0;
+      }
+   }
+
+   if (inputs & VERT_BIT_TEX1) {
+      req |= R200_CP_VC_FRMT_ST1;
+
+      if (VB->TexCoordPtr[1]->size == 4) {
+        req |= R200_CP_VC_FRMT_Q1;
+        vtx |= R200_TCL_VTX_Q1;
+      }
+   }
+
+   if (vtx != rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT]) {
+      R200_STATECHANGE( rmesa, tcl );
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] = vtx;
+   }
+
+   for (i = 0 ; i < R200_TCL_MAX_SETUP ; i++) 
+      if ((setup_tab[i].vertex_format & req) == req) 
+        break;
+
+   if (rmesa->tcl.vertex_format == setup_tab[i].vertex_format &&
+       rmesa->tcl.indexed_verts.buf)
+      return;
+
+   if (rmesa->tcl.indexed_verts.buf)
+      r200ReleaseArrays( ctx, ~0 );
+
+   r200AllocDmaRegionVerts( rmesa, 
+                             &rmesa->tcl.indexed_verts, 
+                             VB->Count,
+                             setup_tab[i].vertex_size * 4, 
+                             4);
+
+   setup_tab[i].emit( ctx, 0, VB->Count, 
+                     rmesa->tcl.indexed_verts.address + 
+                     rmesa->tcl.indexed_verts.start );
+
+   rmesa->tcl.vertex_format = setup_tab[i].vertex_format;
+   rmesa->tcl.indexed_verts.aos_start = GET_START( &rmesa->tcl.indexed_verts );
+   rmesa->tcl.indexed_verts.aos_size = setup_tab[i].vertex_size;
+   rmesa->tcl.indexed_verts.aos_stride = setup_tab[i].vertex_size;
+
+   rmesa->tcl.aos_components[0] = &rmesa->tcl.indexed_verts;
+   rmesa->tcl.nr_aos_components = 1;
+}
+
+
+
+void r200ReleaseArrays( GLcontext *ctx, GLuint newinputs )
+{
+   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+
+   if (R200_DEBUG & DEBUG_VERTS) 
+      _tnl_print_vert_flags( __FUNCTION__, newinputs );
+
+   if (newinputs) 
+     r200ReleaseDmaRegion( rmesa, &rmesa->tcl.indexed_verts, __FUNCTION__ );
+}
diff --git a/src/mesa/drivers/dri/r200/r200_pixel.c b/src/mesa/drivers/dri/r200/r200_pixel.c
new file mode 100644 (file)
index 0000000..94977ef
--- /dev/null
@@ -0,0 +1,494 @@
+/* $XFree86$ */
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "glheader.h"
+#include "enums.h"
+#include "mtypes.h"
+#include "macros.h"
+#include "texutil.h"
+#include "swrast/swrast.h"
+
+#include "r200_context.h"
+#include "r200_ioctl.h"
+#include "r200_pixel.h"
+#include "r200_swtcl.h"
+
+
+
+static GLboolean
+check_color( const GLcontext *ctx, GLenum type, GLenum format,
+            const struct gl_pixelstore_attrib *packing,
+            const void *pixels, GLint sz, GLint pitch )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint cpp = rmesa->r200Screen->cpp;
+
+   if (R200_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (        (pitch & 63) ||
+       ctx->_ImageTransferState ||
+       packing->SwapBytes ||
+       packing->LsbFirst) {
+      if (R200_DEBUG & DEBUG_PIXEL)
+        fprintf(stderr, "%s: failed 1\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   if ( type == GL_UNSIGNED_INT_8_8_8_8_REV && 
+       cpp == 4 && 
+       format == GL_BGRA ) {
+      if (R200_DEBUG & DEBUG_PIXEL)
+        fprintf(stderr, "%s: passed 2\n", __FUNCTION__);
+      return GL_TRUE;
+   }
+
+   if (R200_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s: failed\n", __FUNCTION__);
+
+   return GL_FALSE;
+}
+
+static GLboolean
+check_color_per_fragment_ops( const GLcontext *ctx )
+{
+   int result;
+   result = (!(     ctx->Color.AlphaEnabled || 
+                   ctx->Depth.Test ||
+                   ctx->Fog.Enabled ||
+                   ctx->Scissor.Enabled ||
+                   ctx->Stencil.Enabled ||
+                   !ctx->Color.ColorMask[0] ||
+                   !ctx->Color.ColorMask[1] ||
+                   !ctx->Color.ColorMask[2] ||
+                   !ctx->Color.ColorMask[3] ||
+                   ctx->Color.ColorLogicOpEnabled ||
+                   ctx->Texture._EnabledUnits ||
+                   ctx->Depth.OcclusionTest
+           ) &&
+          ctx->Current.RasterPosValid);
+   
+   return result;
+}
+
+
+
+static GLboolean
+clip_pixelrect( const GLcontext *ctx,
+               const GLframebuffer *buffer,
+               GLint *x, GLint *y,
+               GLsizei *width, GLsizei *height,
+               GLint *size )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   /* left clipping */
+   if (*x < buffer->_Xmin) {
+      *width -= (buffer->_Xmin - *x);
+      *x = buffer->_Xmin;
+   }
+
+   /* right clipping */
+   if (*x + *width > buffer->_Xmax)
+      *width -= (*x + *width - buffer->_Xmax - 1);
+
+   if (*width <= 0)
+      return GL_FALSE;
+
+   /* bottom clipping */
+   if (*y < buffer->_Ymin) {
+      *height -= (buffer->_Ymin - *y);
+      *y = buffer->_Ymin;
+   }
+
+   /* top clipping */
+   if (*y + *height > buffer->_Ymax)
+      *height -= (*y + *height - buffer->_Ymax - 1);
+
+   if (*height <= 0)
+      return GL_FALSE;
+
+   *size = ((*y + *height - 1) * rmesa->r200Screen->frontPitch +
+           (*x + *width - 1) * rmesa->r200Screen->cpp);
+
+   return GL_TRUE;
+}
+
+static GLboolean
+r200TryReadPixels( GLcontext *ctx,
+                 GLint x, GLint y, GLsizei width, GLsizei height,
+                 GLenum format, GLenum type,
+                 const struct gl_pixelstore_attrib *pack,
+                 GLvoid *pixels )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLint size;
+   GLint pitch = pack->RowLength ? pack->RowLength : width;
+   GLint blit_format;
+
+   if (R200_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   /* Only accelerate reading to agp buffers.
+    */
+   if ( !r200IsAgpMemory(rmesa, pixels, 
+                        pitch * height * rmesa->r200Screen->cpp ) ) {
+      if (R200_DEBUG & DEBUG_PIXEL)
+        fprintf(stderr, "%s: dest not agp\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   /* Need GL_PACK_INVERT_MESA to cope with upsidedown results from
+    * blitter:
+    */
+   if (!pack->Invert) {
+      if (R200_DEBUG & DEBUG_PIXEL)
+        fprintf(stderr, "%s: MESA_PACK_INVERT not set\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   if (!check_color(ctx, type, format, pack, pixels, size, pitch))
+      return GL_FALSE;
+
+   switch ( rmesa->r200Screen->cpp ) {
+   case 4:
+      blit_format = R200_CP_COLOR_FORMAT_ARGB8888;
+      break;
+   default:
+      return GL_FALSE;
+   }
+
+
+   /* Although the blits go on the command buffer, need to do this and
+    * fire with lock held to guarentee cliprects and drawOffset are
+    * correct.
+    *
+    * This is an unusual situation however, as the code which flushes
+    * a full command buffer expects to be called unlocked.  As a
+    * workaround, immediately flush the buffer on aquiring the lock.
+    */
+   LOCK_HARDWARE( rmesa );
+
+   if (rmesa->store.cmd_used)
+      r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
+
+   if (!clip_pixelrect(ctx, ctx->ReadBuffer, &x, &y, &width, &height,
+                      &size)) {
+      UNLOCK_HARDWARE( rmesa );
+      if (R200_DEBUG & DEBUG_PIXEL)
+        fprintf(stderr, "%s totally clipped -- nothing to do\n",
+                __FUNCTION__);
+      return GL_TRUE;
+   }
+
+   {
+      __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+      int nbox = dPriv->numClipRects;
+      int src_offset = rmesa->state.color.drawOffset;
+      int src_pitch = rmesa->state.color.drawPitch * rmesa->r200Screen->cpp;
+      int dst_offset = r200AgpOffsetFromVirtual( rmesa, pixels);
+      int dst_pitch = pitch * rmesa->r200Screen->cpp;
+      XF86DRIClipRectRec *box = dPriv->pClipRects;
+      int i;
+
+      r200EmitWait( rmesa, RADEON_WAIT_3D ); 
+
+      y = dPriv->h - y - height;
+      x += dPriv->x;
+      y += dPriv->y;
+
+
+      if (R200_DEBUG & DEBUG_PIXEL)
+        fprintf(stderr, "readpixel blit src_pitch %d dst_pitch %d\n",
+                src_pitch, dst_pitch);
+
+      for (i = 0 ; i < nbox ; i++)
+      {
+        GLint bx = box[i].x1;
+        GLint by = box[i].y1;
+        GLint bw = box[i].x2 - bx;
+        GLint bh = box[i].y2 - by;
+        
+        if (bx < x) bw -= x - bx, bx = x;
+        if (by < y) bh -= y - by, by = y;
+        if (bx + bw > x + width) bw = x + width - bx;
+        if (by + bh > y + height) bh = y + height - by;
+        if (bw <= 0) continue;
+        if (bh <= 0) continue;
+
+        r200EmitBlit( rmesa,
+                      blit_format,
+                      src_pitch, src_offset,
+                      dst_pitch, dst_offset,
+                      bx, by,
+                      bx - x, by - y,
+                      bw, bh );
+      }
+
+      r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
+   }
+   UNLOCK_HARDWARE( rmesa );
+
+   r200Finish( ctx ); /* required by GL */
+
+   return GL_TRUE;
+}
+
+static void
+r200ReadPixels( GLcontext *ctx,
+                GLint x, GLint y, GLsizei width, GLsizei height,
+                GLenum format, GLenum type,
+                const struct gl_pixelstore_attrib *pack,
+                GLvoid *pixels )
+{
+   if (R200_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (!r200TryReadPixels( ctx, x, y, width, height, format, type, pack, 
+                          pixels))
+      _swrast_ReadPixels( ctx, x, y, width, height, format, type, pack, 
+                         pixels);
+}
+
+
+
+
+static void do_draw_pix( GLcontext *ctx,
+                        GLint x, GLint y, GLsizei width, GLsizei height,
+                        GLint pitch,
+                        const void *pixels,
+                        GLuint dest, GLuint planemask)
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+   XF86DRIClipRectPtr box = dPriv->pClipRects;
+   int nbox = dPriv->numClipRects;
+   int i;
+   int blit_format;
+   int size;
+   int src_offset = r200AgpOffsetFromVirtual( rmesa, pixels);
+   int src_pitch = pitch * rmesa->r200Screen->cpp;
+
+   if (R200_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   switch ( rmesa->r200Screen->cpp ) {
+   case 2:
+      blit_format = R200_CP_COLOR_FORMAT_RGB565;
+      break;
+   case 4:
+      blit_format = R200_CP_COLOR_FORMAT_ARGB8888;
+      break;
+   default:
+      return;
+   }
+
+
+   LOCK_HARDWARE( rmesa );
+
+   if (rmesa->store.cmd_used)
+      r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
+
+   y -= height;                        /* cope with pixel zoom */
+   
+   if (!clip_pixelrect(ctx, ctx->DrawBuffer,
+                      &x, &y, &width, &height,
+                      &size)) {
+      UNLOCK_HARDWARE( rmesa );
+      return;
+   }
+
+   y = dPriv->h - y - height;  /* convert from gl to hardware coords */
+   x += dPriv->x;
+   y += dPriv->y;
+
+
+   r200EmitWait( rmesa, RADEON_WAIT_3D );
+
+   for (i = 0 ; i < nbox ; i++ )
+   {
+      GLint bx = box[i].x1;
+      GLint by = box[i].y1;
+      GLint bw = box[i].x2 - bx;
+      GLint bh = box[i].y2 - by;
+
+      if (bx < x) bw -= x - bx, bx = x;
+      if (by < y) bh -= y - by, by = y;
+      if (bx + bw > x + width) bw = x + width - bx;
+      if (by + bh > y + height) bh = y + height - by;
+      if (bw <= 0) continue;
+      if (bh <= 0) continue;
+
+      r200EmitBlit( rmesa,
+                   blit_format,
+                   src_pitch, src_offset,
+                   rmesa->state.color.drawPitch * rmesa->r200Screen->cpp,
+                   rmesa->state.color.drawOffset,
+                   bx - x, by - y,
+                   bx, by,
+                   bw, bh );
+   }
+
+   r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
+   r200WaitForIdleLocked( rmesa ); /* required by GL */
+   UNLOCK_HARDWARE( rmesa );
+}
+
+
+
+
+static GLboolean
+r200TryDrawPixels( GLcontext *ctx,
+                 GLint x, GLint y, GLsizei width, GLsizei height,
+                 GLenum format, GLenum type,
+                 const struct gl_pixelstore_attrib *unpack,
+                 const GLvoid *pixels )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLint pitch = unpack->RowLength ? unpack->RowLength : width;
+   GLuint dest, planemask;
+   GLuint cpp = rmesa->r200Screen->cpp;
+   GLint size = width * pitch * cpp;
+
+   if (R200_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   switch (format) {
+   case GL_RGB:
+   case GL_RGBA:
+   case GL_BGRA:
+      dest = rmesa->state.color.drawOffset;
+
+      planemask = r200PackColor(cpp,
+                               ctx->Color.ColorMask[RCOMP],
+                               ctx->Color.ColorMask[GCOMP],
+                               ctx->Color.ColorMask[BCOMP],
+                               ctx->Color.ColorMask[ACOMP]);
+
+      if (cpp == 2)
+        planemask |= planemask << 16;
+
+      if (planemask != ~0)
+        return GL_FALSE;       /* fix me -- should be possible */
+
+      /* Can't do conversions on agp reads/draws. 
+       */
+      if ( !r200IsAgpMemory( rmesa, pixels, size ) ) {
+        if (R200_DEBUG & DEBUG_PIXEL)
+           fprintf(stderr, "%s: not agp memory\n", __FUNCTION__);
+        return GL_FALSE;
+      }
+
+      if (!check_color(ctx, type, format, unpack, pixels, size, pitch)) {
+        return GL_FALSE;
+      }
+      if (!check_color_per_fragment_ops(ctx)) {
+        return GL_FALSE;
+      }
+
+      if (ctx->Pixel.ZoomX != 1.0F ||
+         ctx->Pixel.ZoomY != -1.0F)
+        return GL_FALSE;
+      break;
+
+   default:
+      return GL_FALSE;
+   }
+
+   if ( r200IsAgpMemory(rmesa, pixels, size) )
+   {
+      do_draw_pix( ctx, x, y, width, height, pitch, pixels,
+                  dest, planemask );
+      return GL_TRUE;
+   }
+   else if (0)
+   {
+      /* Pixels is in regular memory -- get dma buffers and perform
+       * upload through them.
+       */
+   }
+   else
+      return GL_FALSE;
+}
+
+static void
+r200DrawPixels( GLcontext *ctx,
+                GLint x, GLint y, GLsizei width, GLsizei height,
+                GLenum format, GLenum type,
+                const struct gl_pixelstore_attrib *unpack,
+                const GLvoid *pixels )
+{
+   if (R200_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (!r200TryDrawPixels( ctx, x, y, width, height, format, type,
+                         unpack, pixels ))
+      _swrast_DrawPixels( ctx, x, y, width, height, format, type,
+                         unpack, pixels );
+}
+
+
+static void
+r200Bitmap( GLcontext *ctx, GLint px, GLint py,
+                 GLsizei width, GLsizei height,
+                 const struct gl_pixelstore_attrib *unpack,
+                 const GLubyte *bitmap )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   if (rmesa->Fallback)
+      _swrast_Bitmap( ctx, px, py, width, height, unpack, bitmap );
+   else
+      r200PointsBitmap( ctx, px, py, width, height, unpack, bitmap );
+}
+
+
+
+void r200InitPixelFuncs( GLcontext *ctx )
+{
+   /* Pixel path fallbacks.
+    */
+   ctx->Driver.Accum = _swrast_Accum;
+   ctx->Driver.Bitmap = _swrast_Bitmap;
+   ctx->Driver.CopyPixels = _swrast_CopyPixels;
+   ctx->Driver.DrawPixels = _swrast_DrawPixels;
+   ctx->Driver.ReadPixels = _swrast_ReadPixels;
+
+   if (!getenv("R200_NO_BLITS") && R200_CONTEXT(ctx)->dri.drmMinor >= 6) {
+      ctx->Driver.ReadPixels = r200ReadPixels;  
+      ctx->Driver.DrawPixels = r200DrawPixels; 
+      if (getenv("R200_HW_BITMAP")) 
+        ctx->Driver.Bitmap = r200Bitmap;
+   }
+}
diff --git a/src/mesa/drivers/dri/r200/r200_pixel.h b/src/mesa/drivers/dri/r200/r200_pixel.h
new file mode 100644 (file)
index 0000000..8e7aca1
--- /dev/null
@@ -0,0 +1,43 @@
+/* $XFree86$ */
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef __R200_PIXEL_H__
+#define __R200_PIXEL_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+extern void r200InitPixelFuncs( GLcontext *ctx );
+
+#endif
+#endif
diff --git a/src/mesa/drivers/dri/r200/r200_reg.h b/src/mesa/drivers/dri/r200/r200_reg.h
new file mode 100644 (file)
index 0000000..cc0ae50
--- /dev/null
@@ -0,0 +1,1444 @@
+/* $XFree86$ */
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef _R200_REG_H_
+#define _R200_REG_H_
+
+#define R200_PP_MISC                      0x1c14 
+#define     R200_REF_ALPHA_MASK        0x000000ff
+#define     R200_ALPHA_TEST_FAIL       (0 << 8)
+#define     R200_ALPHA_TEST_LESS       (1 << 8)
+#define     R200_ALPHA_TEST_LEQUAL     (2 << 8)
+#define     R200_ALPHA_TEST_EQUAL      (3 << 8)
+#define     R200_ALPHA_TEST_GEQUAL     (4 << 8)
+#define     R200_ALPHA_TEST_GREATER    (5 << 8)
+#define     R200_ALPHA_TEST_NEQUAL     (6 << 8)
+#define     R200_ALPHA_TEST_PASS       (7 << 8)
+#define     R200_ALPHA_TEST_OP_MASK    (7 << 8)
+#define     R200_CHROMA_FUNC_FAIL      (0 << 16)
+#define     R200_CHROMA_FUNC_PASS      (1 << 16)
+#define     R200_CHROMA_FUNC_NEQUAL    (2 << 16)
+#define     R200_CHROMA_FUNC_EQUAL     (3 << 16)
+#define     R200_CHROMA_KEY_NEAREST    (0 << 18)
+#define     R200_CHROMA_KEY_ZERO       (1 << 18)
+#define     R200_RIGHT_HAND_CUBE_D3D   (0 << 24)
+#define     R200_RIGHT_HAND_CUBE_OGL   (1 << 24)
+#define R200_PP_FOG_COLOR                 0x1c18 
+#define     R200_FOG_COLOR_MASK        0x00ffffff
+#define     R200_FOG_VERTEX            (0 << 24)
+#define     R200_FOG_TABLE             (1 << 24)
+#define     R200_FOG_USE_DEPTH         (0 << 25)
+#define     R200_FOG_USE_W             (1 << 25)
+#define     R200_FOG_USE_DIFFUSE_ALPHA (2 << 25)
+#define     R200_FOG_USE_SPEC_ALPHA    (3 << 25)
+#define     R200_FOG_USE_VTX_FOG       (4 << 25)
+#define R200_RE_SOLID_COLOR               0x1c1c 
+#define R200_RB3D_BLENDCNTL               0x1c20
+#define     R200_COMB_FCN_MASK                    (7  << 12)
+#define     R200_COMB_FCN_ADD_CLAMP               (0  << 12)
+#define     R200_COMB_FCN_ADD_NOCLAMP             (1  << 12)
+#define     R200_COMB_FCN_SUB_CLAMP               (2  << 12)
+#define     R200_COMB_FCN_SUB_NOCLAMP             (3  << 12)
+#define     R200_COMB_FCN_MIN                     (4  << 12)
+#define     R200_COMB_FCN_MAX                     (5  << 12)
+#define     R200_COMB_FCN_RSUB_CLAMP              (6  << 12)
+#define     R200_COMB_FCN_RSUB_NOCLAMP            (7  << 12)
+#define     R200_SRC_BLEND_GL_ZERO                (32 << 16)
+#define     R200_SRC_BLEND_GL_ONE                 (33 << 16)
+#define     R200_SRC_BLEND_GL_SRC_COLOR           (34 << 16)
+#define     R200_SRC_BLEND_GL_ONE_MINUS_SRC_COLOR (35 << 16)
+#define     R200_SRC_BLEND_GL_DST_COLOR           (36 << 16)
+#define     R200_SRC_BLEND_GL_ONE_MINUS_DST_COLOR (37 << 16)
+#define     R200_SRC_BLEND_GL_SRC_ALPHA           (38 << 16)
+#define     R200_SRC_BLEND_GL_ONE_MINUS_SRC_ALPHA (39 << 16)
+#define     R200_SRC_BLEND_GL_DST_ALPHA           (40 << 16)
+#define     R200_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA (41 << 16)
+#define     R200_SRC_BLEND_GL_SRC_ALPHA_SATURATE  (42 << 16)
+#define     R200_SRC_BLEND_GL_CONST_COLOR           (43 << 16)
+#define     R200_SRC_BLEND_GL_ONE_MINUS_CONST_COLOR (44 << 16)
+#define     R200_SRC_BLEND_GL_CONST_ALPHA           (45 << 16)
+#define     R200_SRC_BLEND_GL_ONE_MINUS_CONST_ALPHA (46 << 16)
+#define     R200_SRC_BLEND_MASK                     (63 << 16)
+#define     R200_DST_BLEND_GL_ZERO                (32 << 24)
+#define     R200_DST_BLEND_GL_ONE                 (33 << 24)
+#define     R200_DST_BLEND_GL_SRC_COLOR           (34 << 24)
+#define     R200_DST_BLEND_GL_ONE_MINUS_SRC_COLOR (35 << 24)
+#define     R200_DST_BLEND_GL_DST_COLOR           (36 << 24)
+#define     R200_DST_BLEND_GL_ONE_MINUS_DST_COLOR (37 << 24)
+#define     R200_DST_BLEND_GL_SRC_ALPHA           (38 << 24)
+#define     R200_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA (39 << 24)
+#define     R200_DST_BLEND_GL_DST_ALPHA           (40 << 24)
+#define     R200_DST_BLEND_GL_ONE_MINUS_DST_ALPHA (41 << 24)
+#define     R200_DST_BLEND_GL_CONST_COLOR         (43 << 24)
+#define     R200_DST_BLEND_GL_ONE_MINUS_CONST_COLOR (44 << 24)
+#define     R200_DST_BLEND_GL_CONST_ALPHA           (45 << 24)
+#define     R200_DST_BLEND_GL_ONE_MINUS_CONST_ALPHA (46 << 24)
+#define     R200_DST_BLEND_MASK                     (63 << 24)
+#define R200_RB3D_DEPTHOFFSET             0x1c24 
+#define R200_RB3D_DEPTHPITCH              0x1c28 
+#define     R200_DEPTHPITCH_MASK         0x00001ff8
+#define     R200_DEPTH_ENDIAN_NO_SWAP    (0 << 18)
+#define     R200_DEPTH_ENDIAN_WORD_SWAP  (1 << 18)
+#define     R200_DEPTH_ENDIAN_DWORD_SWAP (2 << 18)
+#define R200_RB3D_ZSTENCILCNTL            0x1c2c 
+#define     R200_DEPTH_FORMAT_MASK          (0xf << 0)
+#define     R200_DEPTH_FORMAT_16BIT_INT_Z   (0  <<  0)
+#define     R200_DEPTH_FORMAT_24BIT_INT_Z   (2  <<  0)
+#define     R200_DEPTH_FORMAT_24BIT_FLOAT_Z (3  <<  0)
+#define     R200_DEPTH_FORMAT_32BIT_INT_Z   (4  <<  0)
+#define     R200_DEPTH_FORMAT_32BIT_FLOAT_Z (5  <<  0)
+#define     R200_DEPTH_FORMAT_24BIT_FLOAT_W (9  <<  0)
+#define     R200_DEPTH_FORMAT_32BIT_FLOAT_W (11 <<  0)
+#define     R200_Z_TEST_NEVER               (0  <<  4)
+#define     R200_Z_TEST_LESS                (1  <<  4)
+#define     R200_Z_TEST_LEQUAL              (2  <<  4)
+#define     R200_Z_TEST_EQUAL               (3  <<  4)
+#define     R200_Z_TEST_GEQUAL              (4  <<  4)
+#define     R200_Z_TEST_GREATER             (5  <<  4)
+#define     R200_Z_TEST_NEQUAL              (6  <<  4)
+#define     R200_Z_TEST_ALWAYS              (7  <<  4)
+#define     R200_Z_TEST_MASK                (7  <<  4)
+#define     R200_STENCIL_TEST_NEVER         (0  << 12)
+#define     R200_STENCIL_TEST_LESS          (1  << 12)
+#define     R200_STENCIL_TEST_LEQUAL        (2  << 12)
+#define     R200_STENCIL_TEST_EQUAL         (3  << 12)
+#define     R200_STENCIL_TEST_GEQUAL        (4  << 12)
+#define     R200_STENCIL_TEST_GREATER       (5  << 12)
+#define     R200_STENCIL_TEST_NEQUAL        (6  << 12)
+#define     R200_STENCIL_TEST_ALWAYS        (7  << 12)
+#define     R200_STENCIL_TEST_MASK          (0x7 << 12)
+#define     R200_STENCIL_FAIL_KEEP          (0  << 16)
+#define     R200_STENCIL_FAIL_ZERO          (1  << 16)
+#define     R200_STENCIL_FAIL_REPLACE       (2  << 16)
+#define     R200_STENCIL_FAIL_INC           (3  << 16)
+#define     R200_STENCIL_FAIL_DEC           (4  << 16)
+#define     R200_STENCIL_FAIL_INVERT        (5  << 16)
+#define     R200_STENCIL_FAIL_INC_WRAP      (6  << 16)
+#define     R200_STENCIL_FAIL_DEC_WRAP      (7  << 16)
+#define     R200_STENCIL_FAIL_MASK          (0x7 << 16)
+#define     R200_STENCIL_ZPASS_KEEP         (0  << 20)
+#define     R200_STENCIL_ZPASS_ZERO         (1  << 20)
+#define     R200_STENCIL_ZPASS_REPLACE      (2  << 20)
+#define     R200_STENCIL_ZPASS_INC          (3  << 20)
+#define     R200_STENCIL_ZPASS_DEC          (4  << 20)
+#define     R200_STENCIL_ZPASS_INVERT       (5  << 20)
+#define     R200_STENCIL_ZPASS_INC_WRAP     (6  << 20)
+#define     R200_STENCIL_ZPASS_DEC_WRAP     (7  << 20)
+#define     R200_STENCIL_ZPASS_MASK         (0x7 << 20)
+#define     R200_STENCIL_ZFAIL_KEEP         (0  << 24)
+#define     R200_STENCIL_ZFAIL_ZERO         (1  << 24)
+#define     R200_STENCIL_ZFAIL_REPLACE      (2  << 24)
+#define     R200_STENCIL_ZFAIL_INC          (3  << 24)
+#define     R200_STENCIL_ZFAIL_DEC          (4  << 24)
+#define     R200_STENCIL_ZFAIL_INVERT       (5  << 24)
+#define     R200_STENCIL_ZFAIL_INC_WRAP     (6  << 24)
+#define     R200_STENCIL_ZFAIL_DEC_WRAP     (7  << 24)
+#define     R200_STENCIL_ZFAIL_MASK         (0x7 << 24)
+#define     R200_Z_WRITE_ENABLE             (1  << 30)
+/*gap*/
+#define R200_PP_CNTL                      0x1c38 
+#define     R200_TEX_0_ENABLE                         0x00000010
+#define     R200_TEX_1_ENABLE                         0x00000020
+#define     R200_TEX_2_ENABLE                         0x00000040
+#define     R200_TEX_3_ENABLE                         0x00000080
+#define     R200_TEX_4_ENABLE                         0x00000100
+#define     R200_TEX_5_ENABLE                         0x00000200
+#define     R200_TEX_ENABLE_MASK                      0x000003f0
+#define     R200_FILTER_ROUND_MODE_MASK               0x00000400
+#define     R200_TEX_BLEND_7_ENABLE                   0x00000800
+#define     R200_TEX_BLEND_0_ENABLE                   0x00001000
+#define     R200_TEX_BLEND_1_ENABLE                   0x00002000
+#define     R200_TEX_BLEND_2_ENABLE                   0x00004000
+#define     R200_TEX_BLEND_3_ENABLE                   0x00008000
+#define     R200_TEX_BLEND_4_ENABLE                   0x00010000
+#define     R200_TEX_BLEND_5_ENABLE                   0x00020000
+#define     R200_TEX_BLEND_6_ENABLE                   0x00040000
+#define     R200_MULTI_PASS_ENABLE                    0x00080000
+#define     R200_SPECULAR_ENABLE                      0x00200000
+#define     R200_FOG_ENABLE                           0x00400000
+#define     R200_ALPHA_TEST_ENABLE                    0x00800000
+#define     R200_ANTI_ALIAS_NONE                       0x00000000
+#define     R200_ANTI_ALIAS_LINE                       0x01000000
+#define     R200_ANTI_ALIAS_POLY                       0x02000000
+#define     R200_ANTI_ALIAS_MASK                       0x03000000
+#define R200_RB3D_CNTL                    0x1c3c 
+#define     R200_ALPHA_BLEND_ENABLE       (1  <<  0)
+#define     R200_PLANE_MASK_ENABLE        (1  <<  1)
+#define     R200_DITHER_ENABLE            (1  <<  2)
+#define     R200_ROUND_ENABLE             (1  <<  3)
+#define     R200_SCALE_DITHER_ENABLE      (1  <<  4)
+#define     R200_DITHER_INIT              (1  <<  5)
+#define     R200_ROP_ENABLE               (1  <<  6)
+#define     R200_STENCIL_ENABLE           (1  <<  7)
+#define     R200_Z_ENABLE                 (1  <<  8)
+#define     R200_DEPTH_XZ_OFFEST_ENABLE   (1  <<  9)
+#define     R200_COLOR_FORMAT_ARGB1555    (3  << 10)
+#define     R200_COLOR_FORMAT_RGB565      (4  << 10)
+#define     R200_COLOR_FORMAT_ARGB8888    (6  << 10)
+#define     R200_COLOR_FORMAT_RGB332      (7  << 10)
+#define     R200_COLOR_FORMAT_Y8          (8  << 10)
+#define     R200_COLOR_FORMAT_RGB8        (9  << 10)
+#define     R200_COLOR_FORMAT_YUV422_VYUY (11 << 10)
+#define     R200_COLOR_FORMAT_YUV422_YVYU (12 << 10)
+#define     R200_COLOR_FORMAT_aYUV444     (14 << 10)
+#define     R200_COLOR_FORMAT_ARGB4444    (15 << 10)
+#define     R200_CLRCMP_FLIP_ENABLE       (1  << 14)
+#define     R200_SEPARATE_ALPHA_ENABLE    (1  << 16)
+#define R200_RB3D_COLOROFFSET             0x1c40 
+#define     R200_COLOROFFSET_MASK      0xfffffff0
+#define R200_RE_WIDTH_HEIGHT              0x1c44 
+#define     R200_RE_WIDTH_SHIFT        0
+#define     R200_RE_HEIGHT_SHIFT       16
+#define R200_RB3D_COLORPITCH              0x1c48 
+#define     R200_COLORPITCH_MASK         0x000001ff8
+#define     R200_COLOR_ENDIAN_NO_SWAP    (0 << 18)
+#define     R200_COLOR_ENDIAN_WORD_SWAP  (1 << 18)
+#define     R200_COLOR_ENDIAN_DWORD_SWAP (2 << 18)
+#define R200_SE_CNTL                      0x1c4c 
+#define     R200_FFACE_CULL_CW          (0 <<  0)
+#define     R200_FFACE_CULL_CCW         (1 <<  0)
+#define     R200_FFACE_CULL_DIR_MASK    (1 <<  0)
+#define     R200_BFACE_CULL             (0 <<  1)
+#define     R200_BFACE_SOLID            (3 <<  1)
+#define     R200_FFACE_CULL             (0 <<  3)
+#define     R200_FFACE_SOLID            (3 <<  3)
+#define     R200_FFACE_CULL_MASK        (3 <<  3)
+#define     R200_FLAT_SHADE_VTX_0       (0 <<  6)
+#define     R200_FLAT_SHADE_VTX_1       (1 <<  6)
+#define     R200_FLAT_SHADE_VTX_2       (2 <<  6)
+#define     R200_FLAT_SHADE_VTX_LAST    (3 <<  6)
+#define     R200_DIFFUSE_SHADE_SOLID    (0 <<  8)
+#define     R200_DIFFUSE_SHADE_FLAT     (1 <<  8)
+#define     R200_DIFFUSE_SHADE_GOURAUD  (2 <<  8)
+#define     R200_DIFFUSE_SHADE_MASK     (3 <<  8)
+#define     R200_ALPHA_SHADE_SOLID      (0 << 10)
+#define     R200_ALPHA_SHADE_FLAT       (1 << 10)
+#define     R200_ALPHA_SHADE_GOURAUD    (2 << 10)
+#define     R200_ALPHA_SHADE_MASK       (3 << 10)
+#define     R200_SPECULAR_SHADE_SOLID   (0 << 12)
+#define     R200_SPECULAR_SHADE_FLAT    (1 << 12)
+#define     R200_SPECULAR_SHADE_GOURAUD (2 << 12)
+#define     R200_SPECULAR_SHADE_MASK    (3 << 12)
+#define     R200_FOG_SHADE_SOLID        (0 << 14)
+#define     R200_FOG_SHADE_FLAT         (1 << 14)
+#define     R200_FOG_SHADE_GOURAUD      (2 << 14)
+#define     R200_FOG_SHADE_MASK         (3 << 14)
+#define     R200_ZBIAS_ENABLE_POINT     (1 << 16)
+#define     R200_ZBIAS_ENABLE_LINE      (1 << 17)
+#define     R200_ZBIAS_ENABLE_TRI       (1 << 18)
+#define     R200_WIDELINE_ENABLE        (1 << 20)
+#define     R200_VTX_PIX_CENTER_D3D     (0 << 27)
+#define     R200_VTX_PIX_CENTER_OGL     (1 << 27)
+#define     R200_ROUND_MODE_TRUNC       (0 << 28)
+#define     R200_ROUND_MODE_ROUND       (1 << 28)
+#define     R200_ROUND_MODE_ROUND_EVEN  (2 << 28)
+#define     R200_ROUND_MODE_ROUND_ODD   (3 << 28)
+#define     R200_ROUND_PREC_16TH_PIX    (0 << 30)
+#define     R200_ROUND_PREC_8TH_PIX     (1 << 30)
+#define     R200_ROUND_PREC_4TH_PIX     (2 << 30)
+#define     R200_ROUND_PREC_HALF_PIX    (3 << 30)
+#define R200_RE_CNTL                      0x1c50 
+#define     R200_STIPPLE_ENABLE                     0x1
+#define     R200_SCISSOR_ENABLE                     0x2
+#define     R200_PATTERN_ENABLE                     0x4
+#define     R200_PERSPECTIVE_ENABLE                 0x8
+#define     R200_POINT_SMOOTH                       0x20
+#define     R200_VTX_STQ0_D3D                       0x00010000
+#define     R200_VTX_STQ1_D3D                       0x00040000
+#define     R200_VTX_STQ2_D3D                       0x00100000
+#define     R200_VTX_STQ3_D3D                       0x00400000
+#define     R200_VTX_STQ4_D3D                       0x01000000
+#define     R200_VTX_STQ5_D3D                       0x04000000
+/* gap */
+#define R200_RE_STIPPLE_ADDR              0x1cc8
+#define R200_RE_STIPPLE_DATA              0x1ccc
+#define R200_RE_LINE_PATTERN              0x1cd0 
+#define     R200_LINE_PATTERN_MASK             0x0000ffff
+#define     R200_LINE_REPEAT_COUNT_SHIFT       16
+#define     R200_LINE_PATTERN_START_SHIFT      24
+#define     R200_LINE_PATTERN_LITTLE_BIT_ORDER (0 << 28)
+#define     R200_LINE_PATTERN_BIG_BIT_ORDER    (1 << 28)
+#define     R200_LINE_PATTERN_AUTO_RESET       (1 << 29)
+#define R200_RE_LINE_STATE                0x1cd4 
+#define     R200_LINE_CURRENT_PTR_SHIFT       0
+#define     R200_LINE_CURRENT_COUNT_SHIFT     8
+#define R200_RE_SCISSOR_TL_0              0x1cd8
+#define R200_RE_SCISSOR_BR_0              0x1cdc
+#define R200_RE_SCISSOR_TL_1              0x1ce0
+#define R200_RE_SCISSOR_BR_1              0x1ce4
+#define R200_RE_SCISSOR_TL_2              0x1ce8
+#define R200_RE_SCISSOR_BR_2              0x1cec
+/* gap */
+#define R200_RB3D_DEPTHXY_OFFSET          0x1d60 
+#define     R200_DEPTHX_SHIFT  0
+#define     R200_DEPTHY_SHIFT  16
+/* gap */
+#define R200_RB3D_STENCILREFMASK          0x1d7c 
+#define     R200_STENCIL_REF_SHIFT           0
+#define     R200_STENCIL_REF_MASK            (0xff << 0)
+#define     R200_STENCIL_MASK_SHIFT          16
+#define     R200_STENCIL_VALUE_MASK          (0xff << 16)
+#define     R200_STENCIL_WRITEMASK_SHIFT     24
+#define     R200_STENCIL_WRITE_MASK          (0xff << 24)
+#define R200_RB3D_ROPCNTL                 0x1d80 
+#define     R200_ROP_MASK                    (15 << 8)
+#define     R200_ROP_CLEAR                   (0  << 8)
+#define     R200_ROP_NOR                     (1  << 8)
+#define     R200_ROP_AND_INVERTED            (2  << 8)
+#define     R200_ROP_COPY_INVERTED           (3  << 8)
+#define     R200_ROP_AND_REVERSE             (4  << 8)
+#define     R200_ROP_INVERT                  (5  << 8)
+#define     R200_ROP_XOR                     (6  << 8)
+#define     R200_ROP_NAND                    (7  << 8)
+#define     R200_ROP_AND                     (8  << 8)
+#define     R200_ROP_EQUIV                   (9  << 8)
+#define     R200_ROP_NOOP                    (10 << 8)
+#define     R200_ROP_OR_INVERTED             (11 << 8)
+#define     R200_ROP_COPY                    (12 << 8)
+#define     R200_ROP_OR_REVERSE              (13 << 8)
+#define     R200_ROP_OR                      (14 << 8)
+#define     R200_ROP_SET                     (15 << 8)
+#define R200_RB3D_PLANEMASK               0x1d84 
+/* gap */
+#define R200_SE_VPORT_XSCALE              0x1d98 
+#define R200_SE_VPORT_XOFFSET             0x1d9c 
+#define R200_SE_VPORT_YSCALE              0x1da0 
+#define R200_SE_VPORT_YOFFSET             0x1da4 
+#define R200_SE_VPORT_ZSCALE              0x1da8 
+#define R200_SE_VPORT_ZOFFSET             0x1dac 
+#define R200_SE_ZBIAS_FACTOR              0x1db0 
+#define R200_SE_ZBIAS_CONSTANT            0x1db4 
+#define R200_SE_LINE_WIDTH                0x1db8 
+#define            R200_LINE_WIDTH_SHIFT                   0x00000000
+#define            R200_MINPOINTSIZE_SHIFT                 0x00000010
+/* gap */
+#define R200_SE_VAP_CNTL                           0x2080
+#define     R200_VAP_TCL_ENABLE                       0x00000001
+#define     R200_VAP_SINGLE_BUF_STATE_ENABLE          0x00000010
+#define     R200_VAP_FORCE_W_TO_ONE                   0x00010000
+#define     R200_VAP_D3D_TEX_DEFAULT                  0x00020000
+#define     R200_VAP_VF_MAX_VTX_NUM__SHIFT            18
+#define     R200_VAP_DX_CLIP_SPACE_DEF                0x00400000
+#define R200_SE_VF_CNTL                           0x2084
+#define     R200_VF_PRIM_NONE                         0x00000000
+#define     R200_VF_PRIM_POINTS                       0x00000001
+#define     R200_VF_PRIM_LINES                        0x00000002
+#define     R200_VF_PRIM_LINE_STRIP                   0x00000003
+#define     R200_VF_PRIM_TRIANGLES                    0x00000004
+#define     R200_VF_PRIM_TRIANGLE_FAN                 0x00000005
+#define     R200_VF_PRIM_TRIANGLE_STRIP               0x00000006
+#define     R200_VF_PRIM_RECT_LIST                    0x00000008
+#define     R200_VF_PRIM_3VRT_POINTS                  0x00000009
+#define     R200_VF_PRIM_3VRT_LINES                   0x0000000a
+#define     R200_VF_PRIM_POINT_SPRITES                0x0000000b
+#define     R200_VF_PRIM_LINE_LOOP                    0x0000000c
+#define     R200_VF_PRIM_QUADS                        0x0000000d
+#define     R200_VF_PRIM_QUAD_STRIP                   0x0000000e
+#define     R200_VF_PRIM_POLYGON                      0x0000000f
+#define     R200_VF_PRIM_MASK                         0x0000000f
+#define     R200_VF_PRIM_WALK_IND                     0x00000010
+#define     R200_VF_PRIM_WALK_LIST                    0x00000020
+#define     R200_VF_PRIM_WALK_RING                    0x00000030
+#define     R200_VF_PRIM_WALK_MASK                    0x00000030
+#define     R200_VF_COLOR_ORDER_RGBA                  0x00000040
+#define     R200_VF_TCL_OUTPUT_VTX_ENABLE             0x00000200
+#define     R200_VF_INDEX_SZ_4                        0x00000800
+#define     R200_VF_VERTEX_NUMBER_MASK                0xffff0000
+#define     R200_VF_VERTEX_NUMBER_SHIFT               16
+#define R200_SE_VTX_FMT_0                 0x2088
+#define     R200_VTX_XY                     0 /* always have xy */
+#define     R200_VTX_Z0                     (1<<0)
+#define     R200_VTX_W0                     (1<<1)
+#define     R200_VTX_WEIGHT_COUNT_SHIFT     (2)
+#define     R200_VTX_PV_MATRIX_SEL          (1<<5)
+#define     R200_VTX_N0                     (1<<6)
+#define     R200_VTX_POINT_SIZE             (1<<7)
+#define     R200_VTX_DISCRETE_FOG           (1<<8)
+#define     R200_VTX_SHININESS_0            (1<<9)
+#define     R200_VTX_SHININESS_1            (1<<10)
+#define       R200_VTX_COLOR_NOT_PRESENT      0
+#define       R200_VTX_PK_RGBA          1
+#define       R200_VTX_FP_RGB           2
+#define       R200_VTX_FP_RGBA          3
+#define       R200_VTX_COLOR_MASK             3
+#define     R200_VTX_COLOR_0_SHIFT          11
+#define     R200_VTX_COLOR_1_SHIFT          13
+#define     R200_VTX_COLOR_2_SHIFT          15
+#define     R200_VTX_COLOR_3_SHIFT          17
+#define     R200_VTX_COLOR_4_SHIFT          19
+#define     R200_VTX_COLOR_5_SHIFT          21
+#define     R200_VTX_COLOR_6_SHIFT          23
+#define     R200_VTX_COLOR_7_SHIFT          25
+#define     R200_VTX_XY1                    (1<<28)
+#define     R200_VTX_Z1                     (1<<29)
+#define     R200_VTX_W1                     (1<<30)
+#define     R200_VTX_N1                     (1<<31)
+#define R200_SE_VTX_FMT_1                 0x208c
+#define     R200_VTX_TEX0_COMP_CNT_SHIFT        0
+#define     R200_VTX_TEX1_COMP_CNT_SHIFT        3
+#define     R200_VTX_TEX2_COMP_CNT_SHIFT        6
+#define     R200_VTX_TEX3_COMP_CNT_SHIFT        9
+#define     R200_VTX_TEX4_COMP_CNT_SHIFT        12
+#define     R200_VTX_TEX5_COMP_CNT_SHIFT        15
+#define R200_SE_TCL_OUTPUT_VTX_FMT_0      0x2090 
+#define R200_SE_TCL_OUTPUT_VTX_FMT_1      0x2094 
+/* gap */
+#define R200_SE_VTE_CNTL                  0x20b0
+#define     R200_VPORT_X_SCALE_ENA                0x00000001
+#define     R200_VPORT_X_OFFSET_ENA               0x00000002
+#define     R200_VPORT_Y_SCALE_ENA                0x00000004
+#define     R200_VPORT_Y_OFFSET_ENA               0x00000008
+#define     R200_VPORT_Z_SCALE_ENA                0x00000010
+#define     R200_VPORT_Z_OFFSET_ENA               0x00000020
+#define     R200_VTX_XY_FMT                       0x00000100
+#define     R200_VTX_Z_FMT                        0x00000200
+#define     R200_VTX_W0_FMT                       0x00000400
+#define     R200_VTX_W0_NORMALIZE                 0x00000800
+#define     R200_VTX_ST_DENORMALIZED              0x00001000
+/* gap */
+#define R200_SE_VTX_NUM_ARRAYS            0x20c0
+#define R200_SE_VTX_AOS_ATTR01            0x20c4
+#define R200_SE_VTX_AOS_ADDR0             0x20c8
+#define R200_SE_VTX_AOS_ADDR1             0x20cc
+#define R200_SE_VTX_AOS_ATTR23            0x20d0
+#define R200_SE_VTX_AOS_ADDR2             0x20d4
+#define R200_SE_VTX_AOS_ADDR3             0x20d8
+#define R200_SE_VTX_AOS_ATTR45            0x20dc
+#define R200_SE_VTX_AOS_ADDR4             0x20e0
+#define R200_SE_VTX_AOS_ADDR5             0x20e4
+#define R200_SE_VTX_AOS_ATTR67            0x20e8
+#define R200_SE_VTX_AOS_ADDR6             0x20ec
+#define R200_SE_VTX_AOS_ADDR7             0x20f0
+#define R200_SE_VTX_AOS_ATTR89            0x20f4
+#define R200_SE_VTX_AOS_ADDR8             0x20f8
+#define R200_SE_VTX_AOS_ADDR9             0x20fc
+#define R200_SE_VTX_AOS_ATTR1011          0x2100
+#define R200_SE_VTX_AOS_ADDR10            0x2104
+#define R200_SE_VTX_AOS_ADDR11            0x2108
+#define R200_SE_VF_MAX_VTX_INDX           0x210c
+#define R200_SE_VF_MIN_VTX_INDX           0x2110
+/* gap */
+#define R200_SE_VAP_CNTL_STATUS           0x2140
+#define     R200_VC_NO_SWAP                  (0 << 0)
+#define     R200_VC_16BIT_SWAP               (1 << 0)
+#define     R200_VC_32BIT_SWAP               (2 << 0)
+/* gap */
+#define R200_SE_VTX_STATE_CNTL                     0x2180
+#define     R200_VSC_COLOR_0_ASSEMBLY_CNTL_SHIFT    0x00000000
+#define     R200_VSC_COLOR_1_ASSEMBLY_CNTL_SHIFT    0x00000002
+#define     R200_VSC_COLOR_2_ASSEMBLY_CNTL_SHIFT    0x00000004
+#define     R200_VSC_COLOR_3_ASSEMBLY_CNTL_SHIFT    0x00000006
+#define     R200_VSC_COLOR_4_ASSEMBLY_CNTL_SHIFT    0x00000008
+#define     R200_VSC_COLOR_5_ASSEMBLY_CNTL_SHIFT    0x0000000a
+#define     R200_VSC_COLOR_6_ASSEMBLY_CNTL_SHIFT    0x0000000c
+#define     R200_VSC_COLOR_7_ASSEMBLY_CNTL_SHIFT    0x0000000e
+#define     R200_VSC_UPDATE_USER_COLOR_0_ENABLE    0x00010000
+#define     R200_VSC_UPDATE_USER_COLOR_1_ENABLE    0x00020000
+/* gap */
+#define R200_SE_TCL_VECTOR_INDX_REG                0x2200
+#define R200_SE_TCL_VECTOR_DATA_REG                0x2204
+#define R200_SE_TCL_SCALAR_INDX_REG                0x2208
+#define R200_SE_TCL_SCALAR_DATA_REG                0x220c
+/* gap */
+#define R200_SE_TCL_MATRIX_SEL_0                   0x2230
+#define     R200_MODELVIEW_0_SHIFT           (0) 
+#define     R200_MODELVIEW_1_SHIFT           (8) 
+#define     R200_MODELVIEW_2_SHIFT           (16) 
+#define     R200_MODELVIEW_3_SHIFT           (24) 
+#define R200_SE_TCL_MATRIX_SEL_1                   0x2234
+#define     R200_IT_MODELVIEW_0_SHIFT        (0)
+#define     R200_IT_MODELVIEW_1_SHIFT        (8) 
+#define     R200_IT_MODELVIEW_2_SHIFT        (16)
+#define     R200_IT_MODELVIEW_3_SHIFT        (24)
+#define R200_SE_TCL_MATRIX_SEL_2                   0x2238
+#define     R200_MODELPROJECT_0_SHIFT         (0) 
+#define     R200_MODELPROJECT_1_SHIFT         (8) 
+#define     R200_MODELPROJECT_2_SHIFT         (16) 
+#define     R200_MODELPROJECT_3_SHIFT         (24) 
+#define R200_SE_TCL_MATRIX_SEL_3                   0x223c
+#define     R200_TEXMAT_0_SHIFT    0
+#define     R200_TEXMAT_1_SHIFT    8
+#define     R200_TEXMAT_2_SHIFT    16
+#define     R200_TEXMAT_3_SHIFT    24
+#define R200_SE_TCL_MATRIX_SEL_4                   0x2240
+#define     R200_TEXMAT_4_SHIFT    0
+#define     R200_TEXMAT_5_SHIFT    8
+/* gap */
+#define R200_SE_TCL_OUTPUT_VTX_COMP_SEL     0x2250
+#define     R200_OUTPUT_XYZW                    (1<<0)
+#define     R200_OUTPUT_COLOR_0                 (1<<8)
+#define     R200_OUTPUT_COLOR_1                 (1<<9)
+#define     R200_OUTPUT_TEX_0                   (1<<16)
+#define     R200_OUTPUT_TEX_1                   (1<<17)
+#define     R200_OUTPUT_TEX_2                   (1<<18)
+#define     R200_OUTPUT_TEX_3                   (1<<19)
+#define     R200_OUTPUT_TEX_4                   (1<<20)
+#define     R200_OUTPUT_TEX_5                   (1<<21)
+#define     R200_OUTPUT_TEX_MASK                (0x3f<<16)
+#define     R200_OUTPUT_PT_SIZE                 (1<<25)
+#define     R200_FORCE_INORDER_PROC             (1<<31)
+#define R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0  0x2254
+#define            R200_VERTEX_POSITION_ADDR__SHIFT     0x00000000
+#define R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_1  0x2258
+#define            R200_VTX_COLOR_0_ADDR__SHIFT         0x00000000
+#define            R200_VTX_COLOR_1_ADDR__SHIFT         0x00000008
+#define R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_2  0x225c
+#define            R200_VTX_TEX_0_ADDR__SHIFT           0x00000000
+#define            R200_VTX_TEX_1_ADDR__SHIFT           0x00000008
+#define            R200_VTX_TEX_2_ADDR__SHIFT           0x00000010
+#define            R200_VTX_TEX_3_ADDR__SHIFT           0x00000018
+#define R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_3  0x2260
+#define            R200_VTX_TEX_4_ADDR__SHIFT           0x00000000
+#define            R200_VTX_TEX_5_ADDR__SHIFT           0x00000008
+
+/* gap */
+#define R200_SE_TCL_LIGHT_MODEL_CTL_0       0x2268 
+#define     R200_LIGHTING_ENABLE                (1<<0)
+#define     R200_LIGHT_IN_MODELSPACE            (1<<1)
+#define     R200_LOCAL_VIEWER                   (1<<2)
+#define     R200_NORMALIZE_NORMALS              (1<<3)
+#define     R200_RESCALE_NORMALS                (1<<4)
+#define     R200_SPECULAR_LIGHTS                (1<<5)
+#define     R200_DIFFUSE_SPECULAR_COMBINE       (1<<6)
+#define     R200_LIGHT_ALPHA                    (1<<7)
+#define     R200_LOCAL_LIGHT_VEC_GL             (1<<8)
+#define     R200_LIGHT_NO_NORMAL_AMBIENT_ONLY   (1<<9)
+#define     R200_LIGHT_TWOSIDE                  (1<<10)
+#define     R200_FRONT_SHININESS_SOURCE_SHIFT       (0xb)
+#define     R200_BACK_SHININESS_SOURCE_SHIFT        (0xd)
+#define       R200_LM0_SOURCE_MATERIAL_0           (0)
+#define       R200_LM0_SOURCE_MATERIAL_1           (1)
+#define       R200_LM0_SOURCE_VERTEX_SHININESS_0   (2)
+#define       R200_LM0_SOURCE_VERTEX_SHININESS_1   (3)
+#define R200_SE_TCL_LIGHT_MODEL_CTL_1       0x226c 
+#define       R200_LM1_SOURCE_LIGHT_PREMULT        (0)
+#define       R200_LM1_SOURCE_MATERIAL_0           (1)
+#define       R200_LM1_SOURCE_VERTEX_COLOR_0       (2)
+#define       R200_LM1_SOURCE_VERTEX_COLOR_1       (3)
+#define       R200_LM1_SOURCE_VERTEX_COLOR_2       (4)
+#define       R200_LM1_SOURCE_VERTEX_COLOR_3       (5)
+#define       R200_LM1_SOURCE_VERTEX_COLOR_4       (6)
+#define       R200_LM1_SOURCE_VERTEX_COLOR_5       (7)
+#define       R200_LM1_SOURCE_VERTEX_COLOR_6       (8)
+#define       R200_LM1_SOURCE_VERTEX_COLOR_7       (9)
+#define       R200_LM1_SOURCE_MATERIAL_1           (0xf)
+#define     R200_FRONT_EMISSIVE_SOURCE_SHIFT        (0)
+#define     R200_FRONT_AMBIENT_SOURCE_SHIFT         (4)
+#define     R200_FRONT_DIFFUSE_SOURCE_SHIFT         (8)
+#define     R200_FRONT_SPECULAR_SOURCE_SHIFT        (12)
+#define     R200_BACK_EMISSIVE_SOURCE_SHIFT         (16)
+#define     R200_BACK_AMBIENT_SOURCE_SHIFT          (20)
+#define     R200_BACK_DIFFUSE_SOURCE_SHIFT          (24)
+#define     R200_BACK_SPECULAR_SOURCE_SHIFT         (28)
+#define R200_SE_TCL_PER_LIGHT_CTL_0       0x2270 
+#define     R200_LIGHT_0_ENABLE                    (1<<0)
+#define     R200_LIGHT_0_ENABLE_AMBIENT            (1<<1)
+#define     R200_LIGHT_0_ENABLE_SPECULAR           (1<<2)
+#define     R200_LIGHT_0_IS_LOCAL                  (1<<3)
+#define     R200_LIGHT_0_IS_SPOT                   (1<<4)
+#define     R200_LIGHT_0_DUAL_CONE                 (1<<5)
+#define     R200_LIGHT_0_ENABLE_RANGE_ATTEN        (1<<6)
+#define     R200_LIGHT_0_CONSTANT_RANGE_ATTEN      (1<<7)
+#define     R200_LIGHT_1_ENABLE                    (1<<16)
+#define     R200_LIGHT_1_ENABLE_AMBIENT            (1<<17)
+#define     R200_LIGHT_1_ENABLE_SPECULAR           (1<<18)
+#define     R200_LIGHT_1_IS_LOCAL                  (1<<19)
+#define     R200_LIGHT_1_IS_SPOT                   (1<<20)
+#define     R200_LIGHT_1_DUAL_CONE                 (1<<21)
+#define     R200_LIGHT_1_ENABLE_RANGE_ATTEN        (1<<22)
+#define     R200_LIGHT_1_CONSTANT_RANGE_ATTEN      (1<<23)
+#define     R200_LIGHT_0_SHIFT                   (0)
+#define     R200_LIGHT_1_SHIFT                   (16)
+#define R200_SE_TCL_PER_LIGHT_CTL_1       0x2274 
+#define     R200_LIGHT_2_SHIFT                   (0)
+#define     R200_LIGHT_3_SHIFT                   (16)
+#define R200_SE_TCL_PER_LIGHT_CTL_2       0x2278 
+#define     R200_LIGHT_4_SHIFT                   (0)
+#define     R200_LIGHT_5_SHIFT                   (16)
+#define R200_SE_TCL_PER_LIGHT_CTL_3       0x227c 
+#define     R200_LIGHT_6_SHIFT                   (0)
+#define     R200_LIGHT_7_SHIFT                   (16)
+/* gap */
+#define R200_SE_TCL_TEX_PROC_CTL_2        0x22a8 
+#define     R200_TEXGEN_0_COMP_MASK_SHIFT        (0)
+#define     R200_TEXGEN_1_COMP_MASK_SHIFT        (4)
+#define     R200_TEXGEN_2_COMP_MASK_SHIFT        (8)
+#define     R200_TEXGEN_3_COMP_MASK_SHIFT        (12)
+#define     R200_TEXGEN_4_COMP_MASK_SHIFT        (16)
+#define     R200_TEXGEN_5_COMP_MASK_SHIFT        (20)
+#define R200_SE_TCL_TEX_PROC_CTL_3        0x22ac 
+#define     R200_TEXGEN_0_INPUT_TEX_SHIFT        (0)
+#define     R200_TEXGEN_1_INPUT_TEX_SHIFT        (4)
+#define     R200_TEXGEN_2_INPUT_TEX_SHIFT        (8)
+#define     R200_TEXGEN_3_INPUT_TEX_SHIFT        (12)
+#define     R200_TEXGEN_4_INPUT_TEX_SHIFT        (16)
+#define     R200_TEXGEN_5_INPUT_TEX_SHIFT        (20)
+#define R200_SE_TCL_TEX_PROC_CTL_0        0x22b0 
+#define     R200_TEXGEN_TEXMAT_0_ENABLE         (1<<0)
+#define     R200_TEXGEN_TEXMAT_1_ENABLE         (1<<1)
+#define     R200_TEXGEN_TEXMAT_2_ENABLE         (1<<2)
+#define     R200_TEXGEN_TEXMAT_3_ENABLE         (1<<3)
+#define     R200_TEXGEN_TEXMAT_4_ENABLE         (1<<4)
+#define     R200_TEXGEN_TEXMAT_5_ENABLE         (1<<5)
+#define     R200_TEXMAT_0_ENABLE                (1<<8)
+#define     R200_TEXMAT_1_ENABLE                (1<<9)
+#define     R200_TEXMAT_2_ENABLE                (1<<10)
+#define     R200_TEXMAT_3_ENABLE                (1<<11)
+#define     R200_TEXMAT_4_ENABLE                (1<<12)
+#define     R200_TEXMAT_5_ENABLE                (1<<13)
+#define     R200_TEXGEN_FORCE_W_TO_ONE          (1<<16)
+#define R200_SE_TCL_TEX_PROC_CTL_1        0x22b4 
+#define       R200_TEXGEN_INPUT_MASK           (0xf)
+#define       R200_TEXGEN_INPUT_TEXCOORD_0     (0)
+#define       R200_TEXGEN_INPUT_TEXCOORD_1     (1)
+#define       R200_TEXGEN_INPUT_TEXCOORD_2     (2)
+#define       R200_TEXGEN_INPUT_TEXCOORD_3     (3)
+#define       R200_TEXGEN_INPUT_TEXCOORD_4     (4)
+#define       R200_TEXGEN_INPUT_TEXCOORD_5     (5)
+#define       R200_TEXGEN_INPUT_OBJ            (8)
+#define       R200_TEXGEN_INPUT_EYE            (9)
+#define       R200_TEXGEN_INPUT_EYE_NORMAL     (0xa)
+#define       R200_TEXGEN_INPUT_EYE_REFLECT    (0xb)
+#define       R200_TEXGEN_INPUT_SPHERE         (0xd)
+#define     R200_TEXGEN_0_INPUT_SHIFT        (0)
+#define     R200_TEXGEN_1_INPUT_SHIFT        (4)
+#define     R200_TEXGEN_2_INPUT_SHIFT        (8)
+#define     R200_TEXGEN_3_INPUT_SHIFT        (12)
+#define     R200_TEXGEN_4_INPUT_SHIFT        (16)
+#define     R200_TEXGEN_5_INPUT_SHIFT        (20)
+#define R200_SE_TC_TEX_CYL_WRAP_CTL       0x22b8
+/* gap */
+#define R200_SE_TCL_UCP_VERT_BLEND_CTL    0x22c0 
+#define     R200_UCP_IN_CLIP_SPACE              (1<<0)
+#define     R200_UCP_IN_MODEL_SPACE             (1<<1)
+#define     R200_UCP_ENABLE_0                   (1<<2)
+#define     R200_UCP_ENABLE_1                   (1<<3)
+#define     R200_UCP_ENABLE_2                   (1<<4)
+#define     R200_UCP_ENABLE_3                   (1<<5)
+#define     R200_UCP_ENABLE_4                   (1<<6)
+#define     R200_UCP_ENABLE_5                   (1<<7)
+#define     R200_TCL_FOG_MASK                   (3<<8)
+#define     R200_TCL_FOG_DISABLE                (0<<8)
+#define     R200_TCL_FOG_EXP                    (1<<8)
+#define     R200_TCL_FOG_EXP2                   (2<<8)
+#define     R200_TCL_FOG_LINEAR                 (3<<8)
+#define     R200_RNG_BASED_FOG                  (1<<10)
+#define     R200_CLIP_DISABLE                   (1<<11)
+#define     R200_CULL_FRONT_IS_CW               (0<<28)
+#define     R200_CULL_FRONT_IS_CCW              (1<<28)
+#define     R200_CULL_FRONT                     (1<<29)
+#define     R200_CULL_BACK                      (1<<30)
+#define R200_SE_TCL_POINT_SPRITE_CNTL     0x22c4
+/* gap */
+#define R200_SE_VTX_ST_POS_0_X_4                   0x2300
+#define R200_SE_VTX_ST_POS_0_Y_4                   0x2304
+#define R200_SE_VTX_ST_POS_0_Z_4                   0x2308
+#define R200_SE_VTX_ST_POS_0_W_4                   0x230c
+#define R200_SE_VTX_ST_NORM_0_X                    0x2310
+#define R200_SE_VTX_ST_NORM_0_Y                    0x2314
+#define R200_SE_VTX_ST_NORM_0_Z                    0x2318
+#define R200_SE_VTX_ST_PVMS                        0x231c
+#define R200_SE_VTX_ST_CLR_0_R                     0x2320
+#define R200_SE_VTX_ST_CLR_0_G                     0x2324
+#define R200_SE_VTX_ST_CLR_0_B                     0x2328
+#define R200_SE_VTX_ST_CLR_0_A                     0x232c
+#define R200_SE_VTX_ST_CLR_1_R                     0x2330
+#define R200_SE_VTX_ST_CLR_1_G                     0x2334
+#define R200_SE_VTX_ST_CLR_1_B                     0x2338
+#define R200_SE_VTX_ST_CLR_1_A                     0x233c
+#define R200_SE_VTX_ST_CLR_2_R                     0x2340
+#define R200_SE_VTX_ST_CLR_2_G                     0x2344
+#define R200_SE_VTX_ST_CLR_2_B                     0x2348
+#define R200_SE_VTX_ST_CLR_2_A                     0x234c
+#define R200_SE_VTX_ST_CLR_3_R                     0x2350
+#define R200_SE_VTX_ST_CLR_3_G                     0x2354
+#define R200_SE_VTX_ST_CLR_3_B                     0x2358
+#define R200_SE_VTX_ST_CLR_3_A                     0x235c
+#define R200_SE_VTX_ST_CLR_4_R                     0x2360
+#define R200_SE_VTX_ST_CLR_4_G                     0x2364
+#define R200_SE_VTX_ST_CLR_4_B                     0x2368
+#define R200_SE_VTX_ST_CLR_4_A                     0x236c
+#define R200_SE_VTX_ST_CLR_5_R                     0x2370
+#define R200_SE_VTX_ST_CLR_5_G                     0x2374
+#define R200_SE_VTX_ST_CLR_5_B                     0x2378
+#define R200_SE_VTX_ST_CLR_5_A                     0x237c
+#define R200_SE_VTX_ST_CLR_6_R                     0x2380
+#define R200_SE_VTX_ST_CLR_6_G                     0x2384
+#define R200_SE_VTX_ST_CLR_6_B                     0x2388
+#define R200_SE_VTX_ST_CLR_6_A                     0x238c
+#define R200_SE_VTX_ST_CLR_7_R                     0x2390
+#define R200_SE_VTX_ST_CLR_7_G                     0x2394
+#define R200_SE_VTX_ST_CLR_7_B                     0x2398
+#define R200_SE_VTX_ST_CLR_7_A                     0x239c
+#define R200_SE_VTX_ST_TEX_0_S                     0x23a0
+#define R200_SE_VTX_ST_TEX_0_T                     0x23a4
+#define R200_SE_VTX_ST_TEX_0_R                     0x23a8
+#define R200_SE_VTX_ST_TEX_0_Q                     0x23ac
+#define R200_SE_VTX_ST_TEX_1_S                     0x23b0
+#define R200_SE_VTX_ST_TEX_1_T                     0x23b4
+#define R200_SE_VTX_ST_TEX_1_R                     0x23b8
+#define R200_SE_VTX_ST_TEX_1_Q                     0x23bc
+#define R200_SE_VTX_ST_TEX_2_S                     0x23c0
+#define R200_SE_VTX_ST_TEX_2_T                     0x23c4
+#define R200_SE_VTX_ST_TEX_2_R                     0x23c8
+#define R200_SE_VTX_ST_TEX_2_Q                     0x23cc
+#define R200_SE_VTX_ST_TEX_3_S                     0x23d0
+#define R200_SE_VTX_ST_TEX_3_T                     0x23d4
+#define R200_SE_VTX_ST_TEX_3_R                     0x23d8
+#define R200_SE_VTX_ST_TEX_3_Q                     0x23dc
+#define R200_SE_VTX_ST_TEX_4_S                     0x23e0
+#define R200_SE_VTX_ST_TEX_4_T                     0x23e4
+#define R200_SE_VTX_ST_TEX_4_R                     0x23e8
+#define R200_SE_VTX_ST_TEX_4_Q                     0x23ec
+#define R200_SE_VTX_ST_TEX_5_S                     0x23f0
+#define R200_SE_VTX_ST_TEX_5_T                     0x23f4
+#define R200_SE_VTX_ST_TEX_5_R                     0x23f8
+#define R200_SE_VTX_ST_TEX_5_Q                     0x23fc
+#define R200_SE_VTX_ST_PNT_SPRT_SZ                 0x2400
+#define R200_SE_VTX_ST_DISC_FOG                    0x2404
+#define R200_SE_VTX_ST_SHININESS_0                 0x2408
+#define R200_SE_VTX_ST_SHININESS_1                 0x240c
+#define R200_SE_VTX_ST_BLND_WT_0                   0x2410
+#define R200_SE_VTX_ST_BLND_WT_1                   0x2414
+#define R200_SE_VTX_ST_BLND_WT_2                   0x2418
+#define R200_SE_VTX_ST_BLND_WT_3                   0x241c
+#define R200_SE_VTX_ST_POS_1_X                     0x2420
+#define R200_SE_VTX_ST_POS_1_Y                     0x2424
+#define R200_SE_VTX_ST_POS_1_Z                     0x2428
+#define R200_SE_VTX_ST_POS_1_W                     0x242c
+#define R200_SE_VTX_ST_NORM_1_X                    0x2430
+#define R200_SE_VTX_ST_NORM_1_Y                    0x2434
+#define R200_SE_VTX_ST_NORM_1_Z                    0x2438
+#define R200_SE_VTX_ST_USR_CLR_0_R                 0x2440
+#define R200_SE_VTX_ST_USR_CLR_0_G                 0x2444
+#define R200_SE_VTX_ST_USR_CLR_0_B                 0x2448
+#define R200_SE_VTX_ST_USR_CLR_0_A                 0x244c
+#define R200_SE_VTX_ST_USR_CLR_1_R                 0x2450
+#define R200_SE_VTX_ST_USR_CLR_1_G                 0x2454
+#define R200_SE_VTX_ST_USR_CLR_1_B                 0x2458
+#define R200_SE_VTX_ST_USR_CLR_1_A                 0x245c
+#define R200_SE_VTX_ST_CLR_0_PKD                   0x2460
+#define R200_SE_VTX_ST_CLR_1_PKD                   0x2464
+#define R200_SE_VTX_ST_CLR_2_PKD                   0x2468
+#define R200_SE_VTX_ST_CLR_3_PKD                   0x246c
+#define R200_SE_VTX_ST_CLR_4_PKD                   0x2470
+#define R200_SE_VTX_ST_CLR_5_PKD                   0x2474
+#define R200_SE_VTX_ST_CLR_6_PKD                   0x2478
+#define R200_SE_VTX_ST_CLR_7_PKD                   0x247c
+#define R200_SE_VTX_ST_POS_0_X_2                   0x2480
+#define R200_SE_VTX_ST_POS_0_Y_2                   0x2484
+#define R200_SE_VTX_ST_PAR_CLR_LD                  0x2488
+#define R200_SE_VTX_ST_USR_CLR_PKD                 0x248c
+#define R200_SE_VTX_ST_POS_0_X_3                   0x2490
+#define R200_SE_VTX_ST_POS_0_Y_3                   0x2494
+#define R200_SE_VTX_ST_POS_0_Z_3                   0x2498
+#define R200_SE_VTX_ST_END_OF_PKT                  0x249c
+/* gap */
+#define R200_RE_POINTSIZE                          0x2648
+#define     R200_POINTSIZE_SHIFT                       0
+#define     R200_MAXPOINTSIZE_SHIFT                    16
+/* gap */
+#define R200_RE_TOP_LEFT                  0x26c0 
+#define     R200_RE_LEFT_SHIFT         0
+#define     R200_RE_TOP_SHIFT          16
+#define R200_RE_MISC                      0x26c4 
+#define     R200_STIPPLE_COORD_MASK           0x1f
+#define     R200_STIPPLE_X_OFFSET_SHIFT       0
+#define     R200_STIPPLE_X_OFFSET_MASK        (0x1f << 0)
+#define     R200_STIPPLE_Y_OFFSET_SHIFT       8
+#define     R200_STIPPLE_Y_OFFSET_MASK        (0x1f << 8)
+#define     R200_STIPPLE_LITTLE_BIT_ORDER     (0 << 16)
+#define     R200_STIPPLE_BIG_BIT_ORDER        (1 << 16)
+/* gap */
+#define R200_RE_AUX_SCISSOR_CNTL                   0x26f0
+#define     R200_EXCLUSIVE_SCISSOR_0      0x01000000
+#define     R200_EXCLUSIVE_SCISSOR_1      0x02000000
+#define     R200_EXCLUSIVE_SCISSOR_2      0x04000000
+#define     R200_SCISSOR_ENABLE_0         0x10000000
+#define     R200_SCISSOR_ENABLE_1         0x20000000
+#define     R200_SCISSOR_ENABLE_2         0x40000000
+/* gap */
+#define R200_PP_TXFILTER_0                0x2c00 
+#define     R200_MAG_FILTER_NEAREST                   (0  <<  0)
+#define     R200_MAG_FILTER_LINEAR                    (1  <<  0)
+#define     R200_MAG_FILTER_MASK                      (1  <<  0)
+#define     R200_MIN_FILTER_NEAREST                   (0  <<  1)
+#define     R200_MIN_FILTER_LINEAR                    (1  <<  1)
+#define     R200_MIN_FILTER_NEAREST_MIP_NEAREST       (2  <<  1)
+#define     R200_MIN_FILTER_NEAREST_MIP_LINEAR        (3  <<  1)
+#define     R200_MIN_FILTER_LINEAR_MIP_NEAREST        (6  <<  1)
+#define     R200_MIN_FILTER_LINEAR_MIP_LINEAR         (7  <<  1)
+#define     R200_MIN_FILTER_ANISO_NEAREST             (8  <<  1)
+#define     R200_MIN_FILTER_ANISO_LINEAR              (9  <<  1)
+#define     R200_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST (10 <<  1)
+#define     R200_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR  (11 <<  1)
+#define     R200_MIN_FILTER_MASK                      (15 <<  1)
+#define     R200_MAX_ANISO_1_TO_1                     (0  <<  5)
+#define     R200_MAX_ANISO_2_TO_1                     (1  <<  5)
+#define     R200_MAX_ANISO_4_TO_1                     (2  <<  5)
+#define     R200_MAX_ANISO_8_TO_1                     (3  <<  5)
+#define     R200_MAX_ANISO_16_TO_1                    (4  <<  5)
+#define     R200_MAX_ANISO_MASK                       (7  <<  5)
+#define     R200_MAX_MIP_LEVEL_MASK                   (0x0f << 16)
+#define     R200_MAX_MIP_LEVEL_SHIFT                  16
+#define     R200_YUV_TO_RGB                           (1  << 20)
+#define     R200_YUV_TEMPERATURE_COOL                 (0  << 21)
+#define     R200_YUV_TEMPERATURE_HOT                  (1  << 21)
+#define     R200_YUV_TEMPERATURE_MASK                 (1  << 21)
+#define     R200_WRAPEN_S                             (1  << 22)
+#define     R200_CLAMP_S_WRAP                         (0  << 23)
+#define     R200_CLAMP_S_MIRROR                       (1  << 23)
+#define     R200_CLAMP_S_CLAMP_LAST                   (2  << 23)
+#define     R200_CLAMP_S_MIRROR_CLAMP_LAST            (3  << 23)
+#define     R200_CLAMP_S_CLAMP_BORDER                 (4  << 23)
+#define     R200_CLAMP_S_MIRROR_CLAMP_BORDER          (5  << 23)
+#define     R200_CLAMP_S_CLAMP_GL                     (6  << 23)
+#define     R200_CLAMP_S_MIRROR_CLAMP_GL              (7  << 23)
+#define     R200_CLAMP_S_MASK                         (7  << 23)
+#define     R200_WRAPEN_T                             (1  << 26)
+#define     R200_CLAMP_T_WRAP                         (0  << 27)
+#define     R200_CLAMP_T_MIRROR                       (1  << 27)
+#define     R200_CLAMP_T_CLAMP_LAST                   (2  << 27)
+#define     R200_CLAMP_T_MIRROR_CLAMP_LAST            (3  << 27)
+#define     R200_CLAMP_T_CLAMP_BORDER                 (4  << 27)
+#define     R200_CLAMP_T_MIRROR_CLAMP_BORDER          (5  << 27)
+#define     R200_CLAMP_T_CLAMP_GL                     (6  << 27)
+#define     R200_CLAMP_T_MIRROR_CLAMP_GL              (7  << 27)
+#define     R200_CLAMP_T_MASK                         (7  << 27)
+#define     R200_KILL_LT_ZERO                         (1  << 30)
+#define     R200_BORDER_MODE_OGL                      (0  << 31)
+#define     R200_BORDER_MODE_D3D                      (1  << 31)
+#define R200_PP_TXFORMAT_0                0x2c04
+#define     R200_TXFORMAT_I8                 (0  <<  0)
+#define     R200_TXFORMAT_AI88               (1  <<  0)
+#define     R200_TXFORMAT_RGB332             (2  <<  0)
+#define     R200_TXFORMAT_ARGB1555           (3  <<  0)
+#define     R200_TXFORMAT_RGB565             (4  <<  0)
+#define     R200_TXFORMAT_ARGB4444           (5  <<  0)
+#define     R200_TXFORMAT_ARGB8888           (6  <<  0)
+#define     R200_TXFORMAT_RGBA8888           (7  <<  0)
+#define     R200_TXFORMAT_Y8                 (8  <<  0)
+#define     R200_TXFORMAT_AVYU4444           (9  <<  0)
+#define     R200_TXFORMAT_VYUY422            (10  <<  0)
+#define     R200_TXFORMAT_YVYU422            (11  <<  0)
+#define     R200_TXFORMAT_DXT1               (12  <<  0)
+#define     R200_TXFORMAT_DXT23              (14  <<  0)
+#define     R200_TXFORMAT_DXT45              (15  <<  0)
+#define     R200_TXFORMAT_FORMAT_MASK        (31 <<  0)
+#define     R200_TXFORMAT_FORMAT_SHIFT       0
+#define     R200_TXFORMAT_ALPHA_IN_MAP       (1  <<  6)
+#define     R200_TXFORMAT_NON_POWER2         (1  <<  7)
+#define     R200_TXFORMAT_WIDTH_MASK         (15 <<  8)
+#define     R200_TXFORMAT_WIDTH_SHIFT        8
+#define     R200_TXFORMAT_HEIGHT_MASK        (15 << 12)
+#define     R200_TXFORMAT_HEIGHT_SHIFT       12
+#define     R200_TXFORMAT_F5_WIDTH_MASK      (15 << 16)        /* cube face 5 */
+#define     R200_TXFORMAT_F5_WIDTH_SHIFT     16
+#define     R200_TXFORMAT_F5_HEIGHT_MASK     (15 << 20)
+#define     R200_TXFORMAT_F5_HEIGHT_SHIFT    20
+#define     R200_TXFORMAT_ST_ROUTE_STQ0      (0  << 24)
+#define     R200_TXFORMAT_ST_ROUTE_STQ1      (1  << 24)
+#define     R200_TXFORMAT_ST_ROUTE_STQ2      (2  << 24)
+#define     R200_TXFORMAT_ST_ROUTE_STQ3      (3  << 24)
+#define     R200_TXFORMAT_ST_ROUTE_STQ4      (4  << 24)
+#define     R200_TXFORMAT_ST_ROUTE_STQ5      (5  << 24)
+#define     R200_TXFORMAT_ST_ROUTE_MASK      (7  << 24)
+#define     R200_TXFORMAT_ST_ROUTE_SHIFT     24
+#define     R200_TXFORMAT_ALPHA_MASK_ENABLE  (1  << 28)
+#define     R200_TXFORMAT_CHROMA_KEY_ENABLE  (1  << 29)
+#define     R200_TXFORMAT_CUBIC_MAP_ENABLE   (1  << 30)
+#define R200_PP_TXFORMAT_X_0              0x2c08
+#define     R200_DEPTH_LOG2_MASK                      (0xf << 0)
+#define     R200_DEPTH_LOG2_SHIFT                     0
+#define     R200_VOLUME_FILTER_SHIFT                  4
+#define     R200_VOLUME_FILTER_MASK                   (1 << 4)
+#define     R200_VOLUME_FILTER_NEAREST                (0 << 4)
+#define     R200_VOLUME_FILTER_LINEAR                 (1 << 4)
+#define     R200_WRAPEN_Q                             (1  << 8)
+#define     R200_CLAMP_Q_WRAP                         (0  << 9)
+#define     R200_CLAMP_Q_MIRROR                       (1  << 9)
+#define     R200_CLAMP_Q_CLAMP_LAST                   (2  << 9)
+#define     R200_CLAMP_Q_MIRROR_CLAMP_LAST            (3  << 9)
+#define     R200_CLAMP_Q_CLAMP_BORDER                 (4  << 9)
+#define     R200_CLAMP_Q_MIRROR_CLAMP_BORDER          (5  << 9)
+#define     R200_CLAMP_Q_CLAMP_GL                     (6  << 9)
+#define     R200_CLAMP_Q_MIRROR_CLAMP_GL              (7  << 9)
+#define     R200_CLAMP_Q_MASK                         (7  << 9)
+#define     R200_MIN_MIP_LEVEL_MASK                   (0xff << 12)
+#define     R200_MIN_MIP_LEVEL_SHIFT                  12
+#define     R200_TEXCOORD_NONPROJ                     (0  << 16)
+#define     R200_TEXCOORD_CUBIC_ENV                   (1  << 16)
+#define     R200_TEXCOORD_VOLUME                      (2  << 16)
+#define     R200_TEXCOORD_PROJ                        (3  << 16)
+#define     R200_TEXCOORD_DEPTH                       (4  << 16)
+#define     R200_TEXCOORD_1D_PROJ                     (5  << 16)
+#define     R200_TEXCOORD_1D                          (6  << 16)
+#define     R200_TEXCOORD_ZERO                        (7  << 16)
+#define     R200_TEXCOORD_MASK                        (7  << 16)
+#define     R200_LOD_BIAS_MASK                        (0xfff80000)
+#define     R200_LOD_BIAS_SHIFT                       19
+#define R200_PP_TXSIZE_0                  0x2c0c /* NPOT only */
+#define R200_PP_TXPITCH_0                 0x2c10 /* NPOT only */
+#define R200_PP_BORDER_COLOR_0            0x2c14
+#define R200_PP_CUBIC_FACES_0             0x2c18
+#define     R200_FACE_WIDTH_1_SHIFT                   0
+#define     R200_FACE_HEIGHT_1_SHIFT                  4
+#define     R200_FACE_WIDTH_1_MASK                   (0xf << 0)
+#define     R200_FACE_HEIGHT_1_MASK                  (0xf << 4)
+#define     R200_FACE_WIDTH_2_SHIFT                   8
+#define     R200_FACE_HEIGHT_2_SHIFT                 12
+#define     R200_FACE_WIDTH_2_MASK                   (0xf << 8)
+#define     R200_FACE_HEIGHT_2_MASK                  (0xf << 12)
+#define     R200_FACE_WIDTH_3_SHIFT                  16
+#define     R200_FACE_HEIGHT_3_SHIFT                 20
+#define     R200_FACE_WIDTH_3_MASK                   (0xf << 16)
+#define     R200_FACE_HEIGHT_3_MASK                  (0xf << 20)
+#define     R200_FACE_WIDTH_4_SHIFT                  24
+#define     R200_FACE_HEIGHT_4_SHIFT                 28
+#define     R200_FACE_WIDTH_4_MASK                   (0xf << 24)
+#define     R200_FACE_HEIGHT_4_MASK                  (0xf << 28)
+#define R200_PP_TXFILTER_1                0x2c20 
+#define R200_PP_TXFORMAT_1                0x2c24
+#define R200_PP_TXFORMAT_X_1              0x2c28
+#define R200_PP_TXSIZE_1                  0x2c2c
+#define R200_PP_TXPITCH_1                 0x2c30
+#define R200_PP_BORDER_COLOR_1            0x2c34
+#define R200_PP_CUBIC_FACES_1             0x2c38
+#define R200_PP_TXFILTER_2                0x2c40 
+#define R200_PP_TXFORMAT_2                0x2c44
+#define R200_PP_TXSIZE_2                  0x2c4c
+#define R200_PP_TXFORMAT_X_2              0x2c48
+#define R200_PP_TXPITCH_2                 0x2c50
+#define R200_PP_BORDER_COLOR_2            0x2c54
+#define R200_PP_CUBIC_FACES_2             0x2c58
+#define R200_PP_TXFILTER_3                0x2c60 
+#define R200_PP_TXFORMAT_3                0x2c64
+#define R200_PP_TXSIZE_3                  0x2c6c
+#define R200_PP_TXFORMAT_X_3              0x2c68
+#define R200_PP_TXPITCH_3                 0x2c70
+#define R200_PP_BORDER_COLOR_3            0x2c74
+#define R200_PP_CUBIC_FACES_3             0x2c78
+#define R200_PP_TXFILTER_4                0x2c80 
+#define R200_PP_TXFORMAT_4                0x2c84
+#define R200_PP_TXSIZE_4                  0x2c8c
+#define R200_PP_TXFORMAT_X_4              0x2c88
+#define R200_PP_TXPITCH_4                 0x2c90
+#define R200_PP_BORDER_COLOR_4            0x2c94
+#define R200_PP_CUBIC_FACES_4             0x2c98
+#define R200_PP_TXFILTER_5                0x2ca0 
+#define R200_PP_TXFORMAT_5                0x2ca4
+#define R200_PP_TXSIZE_5                  0x2cac
+#define R200_PP_TXFORMAT_X_5              0x2ca8
+#define R200_PP_TXPITCH_5                 0x2cb0
+#define R200_PP_BORDER_COLOR_5            0x2cb4
+#define R200_PP_CUBIC_FACES_5             0x2cb8
+/* gap */
+#define R200_PP_CNTL_X             0x2cc4
+/* gap */
+#define R200_PP_TXOFFSET_0                0x2d00
+#define     R200_TXO_ENDIAN_NO_SWAP     (0 << 0)
+#define     R200_TXO_ENDIAN_BYTE_SWAP   (1 << 0)
+#define     R200_TXO_ENDIAN_WORD_SWAP   (2 << 0)
+#define     R200_TXO_ENDIAN_HALFDW_SWAP (3 << 0)
+#define     R200_TXO_OFFSET_MASK        0xffffffe0
+#define     R200_TXO_OFFSET_SHIFT       5
+#define R200_PP_CUBIC_OFFSET_F1_0         0x2d04
+#define R200_PP_CUBIC_OFFSET_F2_0         0x2d08
+#define R200_PP_CUBIC_OFFSET_F3_0         0x2d0c
+#define R200_PP_CUBIC_OFFSET_F4_0         0x2d10
+#define R200_PP_CUBIC_OFFSET_F5_0         0x2d14
+#define R200_PP_TXOFFSET_1                0x2d18
+#define R200_PP_CUBIC_OFFSET_F1_1         0x2d1c
+#define R200_PP_CUBIC_OFFSET_F2_1         0x2d20
+#define R200_PP_CUBIC_OFFSET_F3_1         0x2d24
+#define R200_PP_CUBIC_OFFSET_F4_1         0x2d28
+#define R200_PP_CUBIC_OFFSET_F5_1         0x2d2c
+#define R200_PP_TXOFFSET_2                0x2d30
+#define R200_PP_CUBIC_OFFSET_F1_2         0x2d34
+#define R200_PP_CUBIC_OFFSET_F2_2         0x2d38
+#define R200_PP_CUBIC_OFFSET_F3_2         0x2d3c
+#define R200_PP_CUBIC_OFFSET_F4_2         0x2d40
+#define R200_PP_CUBIC_OFFSET_F5_2         0x2d44
+#define R200_PP_TXOFFSET_3                0x2d48
+#define R200_PP_CUBIC_OFFSET_F1_3         0x2d4c
+#define R200_PP_CUBIC_OFFSET_F2_3         0x2d50
+#define R200_PP_CUBIC_OFFSET_F3_3         0x2d54
+#define R200_PP_CUBIC_OFFSET_F4_3         0x2d58
+#define R200_PP_CUBIC_OFFSET_F5_3         0x2d5c
+#define R200_PP_TXOFFSET_4                0x2d60
+#define R200_PP_CUBIC_OFFSET_F1_4         0x2d64
+#define R200_PP_CUBIC_OFFSET_F2_4         0x2d68
+#define R200_PP_CUBIC_OFFSET_F3_4         0x2d6c
+#define R200_PP_CUBIC_OFFSET_F4_4         0x2d70
+#define R200_PP_CUBIC_OFFSET_F5_4         0x2d74
+#define R200_PP_TXOFFSET_5                0x2d78
+#define R200_PP_CUBIC_OFFSET_F1_5         0x2d7c
+#define R200_PP_CUBIC_OFFSET_F2_5         0x2d80
+#define R200_PP_CUBIC_OFFSET_F3_5         0x2d84
+#define R200_PP_CUBIC_OFFSET_F4_5         0x2d88
+#define R200_PP_CUBIC_OFFSET_F5_5         0x2d8c
+/* gap */
+#define R200_PP_TAM_DEBUG3                0x2d9c
+/* gap */
+#define R200_PP_TFACTOR_0                 0x2ee0
+#define R200_PP_TFACTOR_1                 0x2ee4
+#define R200_PP_TFACTOR_2                 0x2ee8
+#define R200_PP_TFACTOR_3                 0x2eec
+#define R200_PP_TFACTOR_4                 0x2ef0
+#define R200_PP_TFACTOR_5                 0x2ef4
+/* gap */
+#define R200_PP_TXCBLEND_0                0x2f00
+#define     R200_TXC_ARG_A_ZERO                (0)
+#define     R200_TXC_ARG_A_CURRENT_COLOR       (2)
+#define     R200_TXC_ARG_A_CURRENT_ALPHA       (3)
+#define     R200_TXC_ARG_A_DIFFUSE_COLOR       (4)
+#define     R200_TXC_ARG_A_DIFFUSE_ALPHA       (5)
+#define     R200_TXC_ARG_A_SPECULAR_COLOR      (6)
+#define     R200_TXC_ARG_A_SPECULAR_ALPHA      (7)
+#define     R200_TXC_ARG_A_TFACTOR_COLOR       (8)
+#define     R200_TXC_ARG_A_TFACTOR_ALPHA       (9)
+#define     R200_TXC_ARG_A_R0_COLOR            (10)
+#define     R200_TXC_ARG_A_R0_ALPHA            (11)
+#define     R200_TXC_ARG_A_R1_COLOR            (12)
+#define     R200_TXC_ARG_A_R1_ALPHA            (13)
+#define     R200_TXC_ARG_A_R2_COLOR            (14)
+#define     R200_TXC_ARG_A_R2_ALPHA            (15)
+#define     R200_TXC_ARG_A_R3_COLOR            (16)
+#define     R200_TXC_ARG_A_R3_ALPHA            (17)
+#define     R200_TXC_ARG_A_R4_COLOR            (18)
+#define     R200_TXC_ARG_A_R4_ALPHA            (19)
+#define     R200_TXC_ARG_A_R5_COLOR            (20)
+#define     R200_TXC_ARG_A_R5_ALPHA            (21)
+#define     R200_TXC_ARG_A_TFACTOR1_COLOR      (26)
+#define     R200_TXC_ARG_A_TFACTOR1_ALPHA      (27)
+#define     R200_TXC_ARG_A_MASK                        (31 << 0)
+#define     R200_TXC_ARG_A_SHIFT                       0
+#define     R200_TXC_ARG_B_ZERO                (0<<5)
+#define     R200_TXC_ARG_B_CURRENT_COLOR       (2<<5)
+#define     R200_TXC_ARG_B_CURRENT_ALPHA       (3<<5)
+#define     R200_TXC_ARG_B_DIFFUSE_COLOR       (4<<5)
+#define     R200_TXC_ARG_B_DIFFUSE_ALPHA       (5<<5)
+#define     R200_TXC_ARG_B_SPECULAR_COLOR      (6<<5)
+#define     R200_TXC_ARG_B_SPECULAR_ALPHA      (7<<5)
+#define     R200_TXC_ARG_B_TFACTOR_COLOR       (8<<5)
+#define     R200_TXC_ARG_B_TFACTOR_ALPHA       (9<<5)
+#define     R200_TXC_ARG_B_R0_COLOR            (10<<5)
+#define     R200_TXC_ARG_B_R0_ALPHA            (11<<5)
+#define     R200_TXC_ARG_B_R1_COLOR            (12<<5)
+#define     R200_TXC_ARG_B_R1_ALPHA            (13<<5)
+#define     R200_TXC_ARG_B_R2_COLOR            (14<<5)
+#define     R200_TXC_ARG_B_R2_ALPHA            (15<<5)
+#define     R200_TXC_ARG_B_R3_COLOR            (16<<5)
+#define     R200_TXC_ARG_B_R3_ALPHA            (17<<5)
+#define     R200_TXC_ARG_B_R4_COLOR            (18<<5)
+#define     R200_TXC_ARG_B_R4_ALPHA            (19<<5)
+#define     R200_TXC_ARG_B_R5_COLOR            (20<<5)
+#define     R200_TXC_ARG_B_R5_ALPHA            (21<<5)
+#define     R200_TXC_ARG_B_TFACTOR1_COLOR      (26<<5)
+#define     R200_TXC_ARG_B_TFACTOR1_ALPHA      (27<<5)
+#define     R200_TXC_ARG_B_MASK                        (31 << 5)
+#define     R200_TXC_ARG_B_SHIFT                       5
+#define     R200_TXC_ARG_C_ZERO                (0<<10)
+#define     R200_TXC_ARG_C_CURRENT_COLOR       (2<<10)
+#define     R200_TXC_ARG_C_CURRENT_ALPHA       (3<<10)
+#define     R200_TXC_ARG_C_DIFFUSE_COLOR       (4<<10)
+#define     R200_TXC_ARG_C_DIFFUSE_ALPHA       (5<<10)
+#define     R200_TXC_ARG_C_SPECULAR_COLOR      (6<<10)
+#define     R200_TXC_ARG_C_SPECULAR_ALPHA      (7<<10)
+#define     R200_TXC_ARG_C_TFACTOR_COLOR       (8<<10)
+#define     R200_TXC_ARG_C_TFACTOR_ALPHA       (9<<10)
+#define     R200_TXC_ARG_C_R0_COLOR            (10<<10)
+#define     R200_TXC_ARG_C_R0_ALPHA            (11<<10)
+#define     R200_TXC_ARG_C_R1_COLOR            (12<<10)
+#define     R200_TXC_ARG_C_R1_ALPHA            (13<<10)
+#define     R200_TXC_ARG_C_R2_COLOR            (14<<10)
+#define     R200_TXC_ARG_C_R2_ALPHA            (15<<10)
+#define     R200_TXC_ARG_C_R3_COLOR            (16<<10)
+#define     R200_TXC_ARG_C_R3_ALPHA            (17<<10)
+#define     R200_TXC_ARG_C_R4_COLOR            (18<<10)
+#define     R200_TXC_ARG_C_R4_ALPHA            (19<<10)
+#define     R200_TXC_ARG_C_R5_COLOR            (20<<10)
+#define     R200_TXC_ARG_C_R5_ALPHA            (21<<10)
+#define     R200_TXC_ARG_C_TFACTOR1_COLOR      (26<<10)
+#define     R200_TXC_ARG_C_TFACTOR1_ALPHA      (27<<10)
+#define     R200_TXC_ARG_C_MASK                        (31 << 10)
+#define     R200_TXC_ARG_C_SHIFT                       10
+#define     R200_TXC_COMP_ARG_A                    (1 << 16)
+#define     R200_TXC_COMP_ARG_A_SHIFT              (16)
+#define     R200_TXC_BIAS_ARG_A                    (1 << 17)
+#define     R200_TXC_SCALE_ARG_A                   (1 << 18)
+#define     R200_TXC_NEG_ARG_A                     (1 << 19)
+#define     R200_TXC_COMP_ARG_B                    (1 << 20)
+#define     R200_TXC_COMP_ARG_B_SHIFT              (20)
+#define     R200_TXC_BIAS_ARG_B                    (1 << 21)
+#define     R200_TXC_SCALE_ARG_B                   (1 << 22)
+#define     R200_TXC_NEG_ARG_B                     (1 << 23)
+#define     R200_TXC_COMP_ARG_C                    (1 << 24)
+#define     R200_TXC_COMP_ARG_C_SHIFT              (24)
+#define     R200_TXC_BIAS_ARG_C                    (1 << 25)
+#define     R200_TXC_SCALE_ARG_C                   (1 << 26)
+#define     R200_TXC_NEG_ARG_C                     (1 << 27)
+#define     R200_TXC_OP_MADD                        (0 << 28)
+#define     R200_TXC_OP_CND0                       (2 << 28)
+#define     R200_TXC_OP_LERP                       (3 << 28)
+#define     R200_TXC_OP_DOT3                       (4 << 28)
+#define     R200_TXC_OP_DOT4                       (5 << 28)
+#define     R200_TXC_OP_CONDITIONAL                (6 << 28)
+#define     R200_TXC_OP_DOT2_ADD                   (7 << 28)
+#define     R200_TXC_OP_MASK                       (7 << 28)
+#define R200_PP_TXCBLEND2_0                0x2f04
+#define     R200_TXC_TFACTOR_SEL_SHIFT             0
+#define     R200_TXC_TFACTOR_SEL_MASK              0x7
+#define     R200_TXC_TFACTOR1_SEL_SHIFT            4
+#define     R200_TXC_TFACTOR1_SEL_MASK             (0x7 << 4)
+#define     R200_TXC_SCALE_SHIFT                   8
+#define     R200_TXC_SCALE_MASK                    (7 << 8)
+#define     R200_TXC_SCALE_1X                      (0 << 8)
+#define     R200_TXC_SCALE_2X                      (1 << 8)
+#define     R200_TXC_SCALE_4X                      (2 << 8)
+#define     R200_TXC_SCALE_8X                      (3 << 8)
+#define     R200_TXC_SCALE_INV2                    (5 << 8)
+#define     R200_TXC_SCALE_INV4                    (6 << 8)
+#define     R200_TXC_SCALE_INV8                    (7 << 8)
+#define     R200_TXC_CLAMP_SHIFT                   12
+#define     R200_TXC_CLAMP_MASK                    (3 << 12)
+#define     R200_TXC_CLAMP_WRAP                    (0 << 12)
+#define     R200_TXC_CLAMP_0_1                     (1 << 12)
+#define     R200_TXC_CLAMP_8_8                     (2 << 12)
+#define     R200_TXC_OUTPUT_REG_MASK               (7 << 16)
+#define     R200_TXC_OUTPUT_REG_NONE               (0 << 16)
+#define     R200_TXC_OUTPUT_REG_R0                 (1 << 16)
+#define     R200_TXC_OUTPUT_REG_R1                 (2 << 16)
+#define     R200_TXC_OUTPUT_REG_R2                 (3 << 16)
+#define     R200_TXC_OUTPUT_REG_R3                 (4 << 16)
+#define     R200_TXC_OUTPUT_REG_R4                 (5 << 16)
+#define     R200_TXC_OUTPUT_REG_R5                 (6 << 16)
+#define     R200_TXC_OUTPUT_MASK_MASK              (7 << 20)
+#define     R200_TXC_OUTPUT_MASK_RGB               (0 << 20)
+#define     R200_TXC_OUTPUT_MASK_RG                (1 << 20)
+#define     R200_TXC_OUTPUT_MASK_RB                (2 << 20)
+#define     R200_TXC_OUTPUT_MASK_R                 (3 << 20)
+#define     R200_TXC_OUTPUT_MASK_GB                (4 << 20)
+#define     R200_TXC_OUTPUT_MASK_G                 (5 << 20)
+#define     R200_TXC_OUTPUT_MASK_B                 (6 << 20)
+#define     R200_TXC_OUTPUT_MASK_NONE              (7 << 20)
+#define     R200_TXC_REPL_NORMAL                   0
+#define     R200_TXC_REPL_RED                      1
+#define     R200_TXC_REPL_GREEN                    2
+#define     R200_TXC_REPL_BLUE                     3
+#define     R200_TXC_REPL_ARG_A_SHIFT              26
+#define     R200_TXC_REPL_ARG_A_MASK               (3 << 26)
+#define     R200_TXC_REPL_ARG_B_SHIFT              28
+#define     R200_TXC_REPL_ARG_B_MASK               (3 << 28)
+#define     R200_TXC_REPL_ARG_C_SHIFT              30
+#define     R200_TXC_REPL_ARG_C_MASK               (3 << 30)
+#define R200_PP_TXABLEND_0                0x2f08
+#define     R200_TXA_ARG_A_ZERO              (0)
+#define     R200_TXA_ARG_A_CURRENT_ALPHA     (2) /* guess */
+#define     R200_TXA_ARG_A_CURRENT_BLUE      (3) /* guess */
+#define     R200_TXA_ARG_A_DIFFUSE_ALPHA     (4)
+#define     R200_TXA_ARG_A_DIFFUSE_BLUE      (5)
+#define     R200_TXA_ARG_A_SPECULAR_ALPHA    (6)
+#define     R200_TXA_ARG_A_SPECULAR_BLUE     (7)
+#define     R200_TXA_ARG_A_TFACTOR_ALPHA     (8)
+#define     R200_TXA_ARG_A_TFACTOR_BLUE      (9)
+#define     R200_TXA_ARG_A_R0_ALPHA          (10)
+#define     R200_TXA_ARG_A_R0_BLUE           (11)
+#define     R200_TXA_ARG_A_R1_ALPHA          (12)
+#define     R200_TXA_ARG_A_R1_BLUE           (13)
+#define     R200_TXA_ARG_A_R2_ALPHA          (14)
+#define     R200_TXA_ARG_A_R2_BLUE           (15)
+#define     R200_TXA_ARG_A_R3_ALPHA          (16)
+#define     R200_TXA_ARG_A_R3_BLUE           (17)
+#define     R200_TXA_ARG_A_R4_ALPHA          (18)
+#define     R200_TXA_ARG_A_R4_BLUE           (19)
+#define     R200_TXA_ARG_A_R5_ALPHA          (20)
+#define     R200_TXA_ARG_A_R5_BLUE           (21)
+#define     R200_TXA_ARG_A_TFACTOR1_ALPHA    (26)
+#define     R200_TXA_ARG_A_TFACTOR1_BLUE     (27)
+#define     R200_TXA_ARG_A_MASK                        (31 << 0)
+#define     R200_TXA_ARG_A_SHIFT                       0
+#define     R200_TXA_ARG_B_ZERO              (0<<5)
+#define     R200_TXA_ARG_B_CURRENT_ALPHA     (2<<5) /* guess */
+#define     R200_TXA_ARG_B_CURRENT_BLUE      (3<<5) /* guess */
+#define     R200_TXA_ARG_B_DIFFUSE_ALPHA     (4<<5)
+#define     R200_TXA_ARG_B_DIFFUSE_BLUE      (5<<5)
+#define     R200_TXA_ARG_B_SPECULAR_ALPHA    (6<<5)
+#define     R200_TXA_ARG_B_SPECULAR_BLUE     (7<<5)
+#define     R200_TXA_ARG_B_TFACTOR_ALPHA     (8<<5)
+#define     R200_TXA_ARG_B_TFACTOR_BLUE      (9<<5)
+#define     R200_TXA_ARG_B_R0_ALPHA          (10<<5)
+#define     R200_TXA_ARG_B_R0_BLUE           (11<<5)
+#define     R200_TXA_ARG_B_R1_ALPHA          (12<<5)
+#define     R200_TXA_ARG_B_R1_BLUE           (13<<5)
+#define     R200_TXA_ARG_B_R2_ALPHA          (14<<5)
+#define     R200_TXA_ARG_B_R2_BLUE           (15<<5)
+#define     R200_TXA_ARG_B_R3_ALPHA          (16<<5)
+#define     R200_TXA_ARG_B_R3_BLUE           (17<<5)
+#define     R200_TXA_ARG_B_R4_ALPHA          (18<<5)
+#define     R200_TXA_ARG_B_R4_BLUE           (19<<5)
+#define     R200_TXA_ARG_B_R5_ALPHA          (20<<5)
+#define     R200_TXA_ARG_B_R5_BLUE           (21<<5)
+#define     R200_TXA_ARG_B_TFACTOR1_ALPHA    (26<<5)
+#define     R200_TXA_ARG_B_TFACTOR1_BLUE     (27<<5)
+#define     R200_TXA_ARG_B_MASK                        (31 << 5)
+#define     R200_TXA_ARG_B_SHIFT                       5
+#define     R200_TXA_ARG_C_ZERO              (0<<10)
+#define     R200_TXA_ARG_C_CURRENT_ALPHA     (2<<10) /* guess */
+#define     R200_TXA_ARG_C_CURRENT_BLUE      (3<<10) /* guess */
+#define     R200_TXA_ARG_C_DIFFUSE_ALPHA     (4<<10)
+#define     R200_TXA_ARG_C_DIFFUSE_BLUE      (5<<10)
+#define     R200_TXA_ARG_C_SPECULAR_ALPHA    (6<<10)
+#define     R200_TXA_ARG_C_SPECULAR_BLUE     (7<<10)
+#define     R200_TXA_ARG_C_TFACTOR_ALPHA     (8<<10)
+#define     R200_TXA_ARG_C_TFACTOR_BLUE      (9<<10)
+#define     R200_TXA_ARG_C_R0_ALPHA          (10<<10)
+#define     R200_TXA_ARG_C_R0_BLUE           (11<<10)
+#define     R200_TXA_ARG_C_R1_ALPHA          (12<<10)
+#define     R200_TXA_ARG_C_R1_BLUE           (13<<10)
+#define     R200_TXA_ARG_C_R2_ALPHA          (14<<10)
+#define     R200_TXA_ARG_C_R2_BLUE           (15<<10)
+#define     R200_TXA_ARG_C_R3_ALPHA          (16<<10)
+#define     R200_TXA_ARG_C_R3_BLUE           (17<<10)
+#define     R200_TXA_ARG_C_R4_ALPHA          (18<<10)
+#define     R200_TXA_ARG_C_R4_BLUE           (19<<10)
+#define     R200_TXA_ARG_C_R5_ALPHA          (20<<10)
+#define     R200_TXA_ARG_C_R5_BLUE           (21<<10)
+#define     R200_TXA_ARG_C_TFACTOR1_ALPHA    (26<<10)
+#define     R200_TXA_ARG_C_TFACTOR1_BLUE     (27<<10)
+#define     R200_TXA_ARG_C_MASK                        (31 << 10)
+#define     R200_TXA_ARG_C_SHIFT                       10
+#define     R200_TXA_COMP_ARG_A                    (1 << 16)
+#define     R200_TXA_COMP_ARG_A_SHIFT              (16)
+#define     R200_TXA_BIAS_ARG_A                    (1 << 17)
+#define     R200_TXA_SCALE_ARG_A                   (1 << 18)
+#define     R200_TXA_NEG_ARG_A                     (1 << 19)
+#define     R200_TXA_COMP_ARG_B                    (1 << 20)
+#define     R200_TXA_COMP_ARG_B_SHIFT              (20)
+#define     R200_TXA_BIAS_ARG_B                    (1 << 21)
+#define     R200_TXA_SCALE_ARG_B                   (1 << 22)
+#define     R200_TXA_NEG_ARG_B                     (1 << 23)
+#define     R200_TXA_COMP_ARG_C                    (1 << 24)
+#define     R200_TXA_COMP_ARG_C_SHIFT              (24)
+#define     R200_TXA_BIAS_ARG_C                    (1 << 25)
+#define     R200_TXA_SCALE_ARG_C                   (1 << 26)
+#define     R200_TXA_NEG_ARG_C                     (1 << 27)
+#define     R200_TXA_OP_MADD                       (0 << 28)
+#define     R200_TXA_OP_CND0                       (2 << 28)
+#define     R200_TXA_OP_LERP                       (3 << 28)
+#define     R200_TXA_OP_CONDITIONAL                (6 << 28)
+#define     R200_TXA_OP_MASK                       (7 << 28)
+#define R200_PP_TXABLEND2_0                0x2f0c
+#define     R200_TXA_TFACTOR_SEL_SHIFT             0
+#define     R200_TXA_TFACTOR_SEL_MASK              0x7
+#define     R200_TXA_TFACTOR1_SEL_SHIFT            4
+#define     R200_TXA_TFACTOR1_SEL_MASK             (0x7 << 4)
+#define     R200_TXA_SCALE_SHIFT                   8
+#define     R200_TXA_SCALE_MASK                    (7 << 8)
+#define     R200_TXA_SCALE_1X                      (0 << 8)
+#define     R200_TXA_SCALE_2X                      (1 << 8)
+#define     R200_TXA_SCALE_4X                      (2 << 8)
+#define     R200_TXA_SCALE_8X                      (3 << 8)
+#define     R200_TXA_SCALE_INV2                    (5 << 8)
+#define     R200_TXA_SCALE_INV4                    (6 << 8)
+#define     R200_TXA_SCALE_INV8                    (7 << 8)
+#define     R200_TXA_CLAMP_SHIFT                   12
+#define     R200_TXA_CLAMP_MASK                    (3 << 12)
+#define     R200_TXA_CLAMP_WRAP                    (0 << 12)
+#define     R200_TXA_CLAMP_0_1                     (1 << 12)
+#define     R200_TXA_CLAMP_8_8                     (2 << 12)
+#define     R200_TXA_OUTPUT_REG_MASK               (7 << 16)
+#define     R200_TXA_OUTPUT_REG_NONE               (0 << 16)
+#define     R200_TXA_OUTPUT_REG_R0                 (1 << 16)
+#define     R200_TXA_OUTPUT_REG_R1                 (2 << 16)
+#define     R200_TXA_OUTPUT_REG_R2                 (3 << 16)
+#define     R200_TXA_OUTPUT_REG_R3                 (4 << 16)
+#define     R200_TXA_OUTPUT_REG_R4                 (5 << 16)
+#define     R200_TXA_OUTPUT_REG_R5                 (6 << 16)
+#define     R200_TXA_DOT_ALPHA                     (1 << 20)
+#define     R200_TXA_REPL_NORMAL                   0
+#define     R200_TXA_REPL_RED                      1
+#define     R200_TXA_REPL_GREEN                    2
+#define     R200_TXA_REPL_ARG_A_SHIFT              26
+#define     R200_TXA_REPL_ARG_A_MASK               (3 << 26)
+#define     R200_TXA_REPL_ARG_B_SHIFT              28
+#define     R200_TXA_REPL_ARG_B_MASK               (3 << 28)
+#define     R200_TXA_REPL_ARG_C_SHIFT              30
+#define     R200_TXA_REPL_ARG_C_MASK               (3 << 30)
+#define R200_PP_TXCBLEND_1                0x2f10
+#define R200_PP_TXCBLEND2_1               0x2f14
+#define R200_PP_TXABLEND_1                0x2f18
+#define R200_PP_TXABLEND2_1               0x2f1c
+#define R200_PP_TXCBLEND_2                0x2f20
+#define R200_PP_TXCBLEND2_2               0x2f24
+#define R200_PP_TXABLEND_2                0x2f28
+#define R200_PP_TXABLEND2_2               0x2f2c
+#define R200_PP_TXCBLEND_3                0x2f30
+#define R200_PP_TXCBLEND2_3               0x2f34
+#define R200_PP_TXABLEND_3                0x2f38
+#define R200_PP_TXABLEND2_3               0x2f3c
+#define R200_PP_TXCBLEND_4                0x2f40
+#define R200_PP_TXCBLEND2_4               0x2f44
+#define R200_PP_TXABLEND_4                0x2f48
+#define R200_PP_TXABLEND2_4               0x2f4c
+#define R200_PP_TXCBLEND_5                0x2f50
+#define R200_PP_TXCBLEND2_5               0x2f54
+#define R200_PP_TXABLEND_5                0x2f58
+#define R200_PP_TXABLEND2_5               0x2f5c
+#define R200_PP_TXCBLEND_6                0x2f60
+#define R200_PP_TXCBLEND2_6               0x2f64
+#define R200_PP_TXABLEND_6                0x2f68
+#define R200_PP_TXABLEND2_6               0x2f6c
+#define R200_PP_TXCBLEND_7                0x2f70
+#define R200_PP_TXCBLEND2_7               0x2f74
+#define R200_PP_TXABLEND_7                0x2f78
+#define R200_PP_TXABLEND2_7               0x2f7c
+/* gap */
+#define R200_RB3D_ABLENDCNTL               0x321C /* see BLENDCTL */
+#define R200_RB3D_CBLENDCNTL               0x3220 /* see BLENDCTL */
+
+
+/*
+ * Offsets in TCL vector state.  NOTE: Hardwiring matrix positions.
+ * Multiple contexts could collaberate to eliminate state bouncing.
+ */
+#define R200_VS_LIGHT_AMBIENT_ADDR          0x00000028
+#define R200_VS_LIGHT_DIFFUSE_ADDR          0x00000030
+#define R200_VS_LIGHT_SPECULAR_ADDR         0x00000038
+#define R200_VS_LIGHT_DIRPOS_ADDR           0x00000040
+#define R200_VS_LIGHT_HWVSPOT_ADDR          0x00000048
+#define R200_VS_LIGHT_ATTENUATION_ADDR      0x00000050
+#define R200_VS_SPOT_DUAL_CONE              0x00000058
+#define R200_VS_GLOBAL_AMBIENT_ADDR         0x0000005C
+#define R200_VS_FOG_PARAM_ADDR              0x0000005D
+#define R200_VS_EYE_VECTOR_ADDR             0x0000005E
+#define R200_VS_UCP_ADDR                    0x00000060
+#define R200_VS_PNT_SPRITE_VPORT_SCALE      0x00000068
+#define R200_VS_MATRIX_0_MV                 0x00000080
+#define R200_VS_MATRIX_1_INV_MV                    0x00000084
+#define R200_VS_MATRIX_2_MVP               0x00000088
+#define R200_VS_MATRIX_3_TEX0              0x0000008C
+#define R200_VS_MATRIX_4_TEX1              0x00000090
+#define R200_VS_MATRIX_5_TEX2              0x00000094
+#define R200_VS_MATRIX_6_TEX3              0x00000098
+#define R200_VS_MATRIX_7_TEX4              0x0000009C
+#define R200_VS_MATRIX_8_TEX5              0x000000A0
+#define R200_VS_MAT_0_EMISS                 0x000000B0
+#define R200_VS_MAT_0_AMB                   0x000000B1
+#define R200_VS_MAT_0_DIF                   0x000000B2
+#define R200_VS_MAT_0_SPEC                  0x000000B3
+#define R200_VS_MAT_1_EMISS                 0x000000B4
+#define R200_VS_MAT_1_AMB                   0x000000B5
+#define R200_VS_MAT_1_DIF                   0x000000B6
+#define R200_VS_MAT_1_SPEC                  0x000000B7
+#define R200_VS_EYE2CLIP_MTX                0x000000B8
+#define R200_VS_PNT_SPRITE_ATT_CONST        0x000000BC
+#define R200_VS_PNT_SPRITE_EYE_IN_MODEL     0x000000BD
+#define R200_VS_PNT_SPRITE_CLAMP            0x000000BE
+#define R200_VS_MAX                         0x000001C0
+
+
+/*
+ * Offsets in TCL scalar state
+ */
+#define R200_SS_LIGHT_DCD_ADDR              0x00000000
+#define R200_SS_LIGHT_DCM_ADDR              0x00000008
+#define R200_SS_LIGHT_SPOT_EXPONENT_ADDR    0x00000010
+#define R200_SS_LIGHT_SPOT_CUTOFF_ADDR      0x00000018
+#define R200_SS_LIGHT_SPECULAR_THRESH_ADDR  0x00000020
+#define R200_SS_LIGHT_RANGE_CUTOFF_SQRD     0x00000028
+#define R200_SS_LIGHT_RANGE_ATT_CONST       0x00000030
+#define R200_SS_VERT_GUARD_CLIP_ADJ_ADDR    0x00000080
+#define R200_SS_VERT_GUARD_DISCARD_ADJ_ADDR 0x00000081
+#define R200_SS_HORZ_GUARD_CLIP_ADJ_ADDR    0x00000082
+#define R200_SS_HORZ_GUARD_DISCARD_ADJ_ADDR 0x00000083
+#define R200_SS_MAT_0_SHININESS             0x00000100
+#define R200_SS_MAT_1_SHININESS             0x00000101
+
+
+/*
+ * Matrix indices
+ */
+#define R200_MTX_MV                        0
+#define R200_MTX_IMV                       1
+#define R200_MTX_MVP                       2
+#define R200_MTX_TEX0                      3
+#define R200_MTX_TEX1                      4
+#define R200_MTX_TEX2                      5
+#define R200_MTX_TEX3                      6
+#define R200_MTX_TEX4                      7
+#define R200_MTX_TEX5                      8
+
+/* Color formats for 2d packets
+ */
+#define R200_CP_COLOR_FORMAT_CI8       2
+#define R200_CP_COLOR_FORMAT_ARGB1555  3
+#define R200_CP_COLOR_FORMAT_RGB565    4
+#define R200_CP_COLOR_FORMAT_ARGB8888  6
+#define R200_CP_COLOR_FORMAT_RGB332    7
+#define R200_CP_COLOR_FORMAT_RGB8      9
+#define R200_CP_COLOR_FORMAT_ARGB4444  15
+
+
+/*
+ * CP type-3 packets
+ */
+#define R200_CP_CMD_NOP                 0xC0001000
+#define R200_CP_CMD_NEXT_CHAR           0xC0001900
+#define R200_CP_CMD_PLY_NEXTSCAN        0xC0001D00
+#define R200_CP_CMD_SET_SCISSORS        0xC0001E00
+#define R200_CP_CMD_LOAD_MICROCODE      0xC0002400
+#define R200_CP_CMD_WAIT_FOR_IDLE       0xC0002600
+#define R200_CP_CMD_3D_DRAW_VBUF        0xC0002800
+#define R200_CP_CMD_3D_DRAW_IMMD        0xC0002900
+#define R200_CP_CMD_3D_DRAW_INDX        0xC0002A00
+#define R200_CP_CMD_LOAD_PALETTE        0xC0002C00
+#define R200_CP_CMD_3D_LOAD_VBPNTR      0xC0002F00
+#define R200_CP_CMD_INDX_BUFFER         0xC0003300
+#define R200_CP_CMD_3D_DRAW_VBUF_2      0xC0003400
+#define R200_CP_CMD_3D_DRAW_IMMD_2      0xC0003500
+#define R200_CP_CMD_3D_DRAW_INDX_2      0xC0003600
+#define R200_CP_CMD_PAINT              0xC0009100
+#define R200_CP_CMD_BITBLT             0xC0009200
+#define R200_CP_CMD_SMALLTEXT          0xC0009300
+#define R200_CP_CMD_HOSTDATA_BLT       0xC0009400
+#define R200_CP_CMD_POLYLINE           0xC0009500
+#define R200_CP_CMD_POLYSCANLINES      0xC0009800
+#define R200_CP_CMD_PAINT_MULTI                0xC0009A00
+#define R200_CP_CMD_BITBLT_MULTI       0xC0009B00
+#define R200_CP_CMD_TRANS_BITBLT       0xC0009C00
+
+
+#define R200_AGP_TEX_OFFSET               0x02000000
+
+
+
+
+#endif
+
diff --git a/src/mesa/drivers/dri/r200/r200_sanity.c b/src/mesa/drivers/dri/r200/r200_sanity.c
new file mode 100644 (file)
index 0000000..11dd36a
--- /dev/null
@@ -0,0 +1,1325 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright 2002 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc, Cedar Park, TX.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+#include <errno.h>
+
+#include "glheader.h"
+#include "imports.h"
+
+#include "r200_context.h"
+#include "r200_ioctl.h"
+#include "r200_sanity.h"
+#include "radeon_reg.h"
+#include "r200_reg.h"
+
+/* Set this '1' to get more verbiage.
+ */
+#define MORE_VERBOSE 1
+
+#if MORE_VERBOSE
+#define VERBOSE (R200_DEBUG & DEBUG_VERBOSE)
+#define NORMAL  (1)
+#else
+#define VERBOSE 0
+#define NORMAL  (R200_DEBUG & DEBUG_VERBOSE)
+#endif
+
+
+/* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
+ * 1.3 cmdbuffers allow all previous state to be updated as well as
+ * the tcl scalar and vector areas.  
+ */
+static struct { 
+   int start; 
+   int len; 
+   const char *name;
+} packet[RADEON_MAX_STATE_PACKETS] = {
+   { RADEON_PP_MISC,7,"RADEON_PP_MISC" },
+   { RADEON_PP_CNTL,3,"RADEON_PP_CNTL" },
+   { RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" },
+   { RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" },
+   { RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" },
+   { RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" },
+   { RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" },
+   { RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" },
+   { RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" },
+   { RADEON_SE_CNTL,2,"RADEON_SE_CNTL" },
+   { RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" },
+   { RADEON_RE_MISC,1,"RADEON_RE_MISC" },
+   { RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" },
+   { RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" },
+   { RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" },
+   { RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" },
+   { RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" },
+   { RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" },
+   { RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" },
+   { RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" },
+   { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" },
+   { R200_PP_TXCBLEND_0, 4, "R200_EMIT_PP_TXCBLEND_0" },
+   { R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1" },
+   { R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2" },
+   { R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3" },
+   { R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4" },
+   { R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5" },
+   { R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6" },
+   { R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7" },
+   { R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0" },
+   { R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0" },
+   { R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0" },
+   { R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL" },
+   { R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0" },
+   { R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2" },
+   { R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL" },
+   { R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0" },
+   { R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1" },
+   { R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2" },
+   { R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3" },
+   { R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4" },
+   { R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5" },
+   { R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0" },
+   { R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1" },
+   { R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2" },
+   { R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3" },
+   { R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4" },
+   { R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5" },
+   { R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL" },
+   { R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1, "R200_SE_TCL_OUTPUT_VTX_COMP_SEL" },
+   { R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3" },
+   { R200_PP_CNTL_X, 1, "R200_PP_CNTL_X" }, 
+   { R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET" }, 
+   { R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL" }, 
+   { R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0" }, 
+   { R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1" }, 
+   { R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2" }, 
+   { R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS" }, 
+   { R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL" }, 
+   { R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE" }, 
+   { R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0" },
+   { R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0" }, /* 61 */
+   { R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0" }, /* 62 */
+   { R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1" },
+   { R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1" },
+   { R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2" },
+   { R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2" },
+   { R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3" },
+   { R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3" },
+   { R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4" },
+   { R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4" },
+   { R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5" },
+   { R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5" },
+};
+
+struct reg_names {
+   int idx;
+   const char *name;
+};
+
+static struct reg_names reg_names[] = {
+   { R200_PP_MISC, "R200_PP_MISC" },
+   { R200_PP_FOG_COLOR, "R200_PP_FOG_COLOR" },
+   { R200_RE_SOLID_COLOR, "R200_RE_SOLID_COLOR" },
+   { R200_RB3D_BLENDCNTL, "R200_RB3D_BLENDCNTL" },
+   { R200_RB3D_DEPTHOFFSET, "R200_RB3D_DEPTHOFFSET" },
+   { R200_RB3D_DEPTHPITCH, "R200_RB3D_DEPTHPITCH" },
+   { R200_RB3D_ZSTENCILCNTL, "R200_RB3D_ZSTENCILCNTL" },
+   { R200_PP_CNTL, "R200_PP_CNTL" },
+   { R200_RB3D_CNTL, "R200_RB3D_CNTL" },
+   { R200_RB3D_COLOROFFSET, "R200_RB3D_COLOROFFSET" },
+   { R200_RE_WIDTH_HEIGHT, "R200_RE_WIDTH_HEIGHT" },
+   { R200_RB3D_COLORPITCH, "R200_RB3D_COLORPITCH" },
+   { R200_SE_CNTL, "R200_SE_CNTL" },
+   { R200_RE_CNTL, "R200_RE_CNTL" },
+   { R200_RE_MISC, "R200_RE_MISC" },
+   { R200_RE_STIPPLE_ADDR, "R200_RE_STIPPLE_ADDR" },
+   { R200_RE_STIPPLE_DATA, "R200_RE_STIPPLE_DATA" },
+   { R200_RE_LINE_PATTERN, "R200_RE_LINE_PATTERN" },
+   { R200_RE_LINE_STATE, "R200_RE_LINE_STATE" },
+   { R200_RE_SCISSOR_TL_0, "R200_RE_SCISSOR_TL_0" },
+   { R200_RE_SCISSOR_BR_0, "R200_RE_SCISSOR_BR_0" },
+   { R200_RE_SCISSOR_TL_1, "R200_RE_SCISSOR_TL_1" },
+   { R200_RE_SCISSOR_BR_1, "R200_RE_SCISSOR_BR_1" },
+   { R200_RE_SCISSOR_TL_2, "R200_RE_SCISSOR_TL_2" },
+   { R200_RE_SCISSOR_BR_2, "R200_RE_SCISSOR_BR_2" },
+   { R200_RB3D_DEPTHXY_OFFSET, "R200_RB3D_DEPTHXY_OFFSET" },
+   { R200_RB3D_STENCILREFMASK, "R200_RB3D_STENCILREFMASK" },
+   { R200_RB3D_ROPCNTL, "R200_RB3D_ROPCNTL" },
+   { R200_RB3D_PLANEMASK, "R200_RB3D_PLANEMASK" },
+   { R200_SE_VPORT_XSCALE, "R200_SE_VPORT_XSCALE" },
+   { R200_SE_VPORT_XOFFSET, "R200_SE_VPORT_XOFFSET" },
+   { R200_SE_VPORT_YSCALE, "R200_SE_VPORT_YSCALE" },
+   { R200_SE_VPORT_YOFFSET, "R200_SE_VPORT_YOFFSET" },
+   { R200_SE_VPORT_ZSCALE, "R200_SE_VPORT_ZSCALE" },
+   { R200_SE_VPORT_ZOFFSET, "R200_SE_VPORT_ZOFFSET" },
+   { R200_SE_ZBIAS_FACTOR, "R200_SE_ZBIAS_FACTOR" },
+   { R200_SE_ZBIAS_CONSTANT, "R200_SE_ZBIAS_CONSTANT" },
+   { R200_SE_LINE_WIDTH, "R200_SE_LINE_WIDTH" },
+   { R200_SE_VAP_CNTL, "R200_SE_VAP_CNTL" },
+   { R200_SE_VF_CNTL, "R200_SE_VF_CNTL" },
+   { R200_SE_VTX_FMT_0, "R200_SE_VTX_FMT_0" },
+   { R200_SE_VTX_FMT_1, "R200_SE_VTX_FMT_1" },
+   { R200_SE_TCL_OUTPUT_VTX_FMT_0, "R200_SE_TCL_OUTPUT_VTX_FMT_0" },
+   { R200_SE_TCL_OUTPUT_VTX_FMT_1, "R200_SE_TCL_OUTPUT_VTX_FMT_1" },
+   { R200_SE_VTE_CNTL, "R200_SE_VTE_CNTL" },
+   { R200_SE_VTX_NUM_ARRAYS, "R200_SE_VTX_NUM_ARRAYS" },
+   { R200_SE_VTX_AOS_ATTR01, "R200_SE_VTX_AOS_ATTR01" },
+   { R200_SE_VTX_AOS_ADDR0, "R200_SE_VTX_AOS_ADDR0" },
+   { R200_SE_VTX_AOS_ADDR1, "R200_SE_VTX_AOS_ADDR1" },
+   { R200_SE_VTX_AOS_ATTR23, "R200_SE_VTX_AOS_ATTR23" },
+   { R200_SE_VTX_AOS_ADDR2, "R200_SE_VTX_AOS_ADDR2" },
+   { R200_SE_VTX_AOS_ADDR3, "R200_SE_VTX_AOS_ADDR3" },
+   { R200_SE_VTX_AOS_ATTR45, "R200_SE_VTX_AOS_ATTR45" },
+   { R200_SE_VTX_AOS_ADDR4, "R200_SE_VTX_AOS_ADDR4" },
+   { R200_SE_VTX_AOS_ADDR5, "R200_SE_VTX_AOS_ADDR5" },
+   { R200_SE_VTX_AOS_ATTR67, "R200_SE_VTX_AOS_ATTR67" },
+   { R200_SE_VTX_AOS_ADDR6, "R200_SE_VTX_AOS_ADDR6" },
+   { R200_SE_VTX_AOS_ADDR7, "R200_SE_VTX_AOS_ADDR7" },
+   { R200_SE_VTX_AOS_ATTR89, "R200_SE_VTX_AOS_ATTR89" },
+   { R200_SE_VTX_AOS_ADDR8, "R200_SE_VTX_AOS_ADDR8" },
+   { R200_SE_VTX_AOS_ADDR9, "R200_SE_VTX_AOS_ADDR9" },
+   { R200_SE_VTX_AOS_ATTR1011, "R200_SE_VTX_AOS_ATTR1011" },
+   { R200_SE_VTX_AOS_ADDR10, "R200_SE_VTX_AOS_ADDR10" },
+   { R200_SE_VTX_AOS_ADDR11, "R200_SE_VTX_AOS_ADDR11" },
+   { R200_SE_VF_MAX_VTX_INDX, "R200_SE_VF_MAX_VTX_INDX" },
+   { R200_SE_VF_MIN_VTX_INDX, "R200_SE_VF_MIN_VTX_INDX" },
+   { R200_SE_VTX_STATE_CNTL, "R200_SE_VTX_STATE_CNTL" },
+   { R200_SE_TCL_VECTOR_INDX_REG, "R200_SE_TCL_VECTOR_INDX_REG" },
+   { R200_SE_TCL_VECTOR_DATA_REG, "R200_SE_TCL_VECTOR_DATA_REG" },
+   { R200_SE_TCL_SCALAR_INDX_REG, "R200_SE_TCL_SCALAR_INDX_REG" },
+   { R200_SE_TCL_SCALAR_DATA_REG, "R200_SE_TCL_SCALAR_DATA_REG" },
+   { R200_SE_TCL_MATRIX_SEL_0, "R200_SE_TCL_MATRIX_SEL_0" },
+   { R200_SE_TCL_MATRIX_SEL_1, "R200_SE_TCL_MATRIX_SEL_1" },
+   { R200_SE_TCL_MATRIX_SEL_2, "R200_SE_TCL_MATRIX_SEL_2" },
+   { R200_SE_TCL_MATRIX_SEL_3, "R200_SE_TCL_MATRIX_SEL_3" },
+   { R200_SE_TCL_MATRIX_SEL_4, "R200_SE_TCL_MATRIX_SEL_4" },
+   { R200_SE_TCL_LIGHT_MODEL_CTL_0, "R200_SE_TCL_LIGHT_MODEL_CTL_0" },
+   { R200_SE_TCL_LIGHT_MODEL_CTL_1, "R200_SE_TCL_LIGHT_MODEL_CTL_1" },
+   { R200_SE_TCL_PER_LIGHT_CTL_0, "R200_SE_TCL_PER_LIGHT_CTL_0" },
+   { R200_SE_TCL_PER_LIGHT_CTL_1, "R200_SE_TCL_PER_LIGHT_CTL_1" },
+   { R200_SE_TCL_PER_LIGHT_CTL_2, "R200_SE_TCL_PER_LIGHT_CTL_2" },
+   { R200_SE_TCL_PER_LIGHT_CTL_3, "R200_SE_TCL_PER_LIGHT_CTL_3" },
+   { R200_SE_TCL_TEX_PROC_CTL_2, "R200_SE_TCL_TEX_PROC_CTL_2" },
+   { R200_SE_TCL_TEX_PROC_CTL_3, "R200_SE_TCL_TEX_PROC_CTL_3" },
+   { R200_SE_TCL_TEX_PROC_CTL_0, "R200_SE_TCL_TEX_PROC_CTL_0" },
+   { R200_SE_TCL_TEX_PROC_CTL_1, "R200_SE_TCL_TEX_PROC_CTL_1" },
+   { R200_SE_TC_TEX_CYL_WRAP_CTL, "R200_SE_TC_TEX_CYL_WRAP_CTL" },
+   { R200_SE_TCL_UCP_VERT_BLEND_CTL, "R200_SE_TCL_UCP_VERT_BLEND_CTL" },
+   { R200_SE_TCL_POINT_SPRITE_CNTL, "R200_SE_TCL_POINT_SPRITE_CNTL" },
+   { R200_SE_VTX_ST_POS_0_X_4, "R200_SE_VTX_ST_POS_0_X_4" },
+   { R200_SE_VTX_ST_POS_0_Y_4, "R200_SE_VTX_ST_POS_0_Y_4" },
+   { R200_SE_VTX_ST_POS_0_Z_4, "R200_SE_VTX_ST_POS_0_Z_4" },
+   { R200_SE_VTX_ST_POS_0_W_4, "R200_SE_VTX_ST_POS_0_W_4" },
+   { R200_SE_VTX_ST_NORM_0_X, "R200_SE_VTX_ST_NORM_0_X" },
+   { R200_SE_VTX_ST_NORM_0_Y, "R200_SE_VTX_ST_NORM_0_Y" },
+   { R200_SE_VTX_ST_NORM_0_Z, "R200_SE_VTX_ST_NORM_0_Z" },
+   { R200_SE_VTX_ST_PVMS, "R200_SE_VTX_ST_PVMS" },
+   { R200_SE_VTX_ST_CLR_0_R, "R200_SE_VTX_ST_CLR_0_R" },
+   { R200_SE_VTX_ST_CLR_0_G, "R200_SE_VTX_ST_CLR_0_G" },
+   { R200_SE_VTX_ST_CLR_0_B, "R200_SE_VTX_ST_CLR_0_B" },
+   { R200_SE_VTX_ST_CLR_0_A, "R200_SE_VTX_ST_CLR_0_A" },
+   { R200_SE_VTX_ST_CLR_1_R, "R200_SE_VTX_ST_CLR_1_R" },
+   { R200_SE_VTX_ST_CLR_1_G, "R200_SE_VTX_ST_CLR_1_G" },
+   { R200_SE_VTX_ST_CLR_1_B, "R200_SE_VTX_ST_CLR_1_B" },
+   { R200_SE_VTX_ST_CLR_1_A, "R200_SE_VTX_ST_CLR_1_A" },
+   { R200_SE_VTX_ST_CLR_2_R, "R200_SE_VTX_ST_CLR_2_R" },
+   { R200_SE_VTX_ST_CLR_2_G, "R200_SE_VTX_ST_CLR_2_G" },
+   { R200_SE_VTX_ST_CLR_2_B, "R200_SE_VTX_ST_CLR_2_B" },
+   { R200_SE_VTX_ST_CLR_2_A, "R200_SE_VTX_ST_CLR_2_A" },
+   { R200_SE_VTX_ST_CLR_3_R, "R200_SE_VTX_ST_CLR_3_R" },
+   { R200_SE_VTX_ST_CLR_3_G, "R200_SE_VTX_ST_CLR_3_G" },
+   { R200_SE_VTX_ST_CLR_3_B, "R200_SE_VTX_ST_CLR_3_B" },
+   { R200_SE_VTX_ST_CLR_3_A, "R200_SE_VTX_ST_CLR_3_A" },
+   { R200_SE_VTX_ST_CLR_4_R, "R200_SE_VTX_ST_CLR_4_R" },
+   { R200_SE_VTX_ST_CLR_4_G, "R200_SE_VTX_ST_CLR_4_G" },
+   { R200_SE_VTX_ST_CLR_4_B, "R200_SE_VTX_ST_CLR_4_B" },
+   { R200_SE_VTX_ST_CLR_4_A, "R200_SE_VTX_ST_CLR_4_A" },
+   { R200_SE_VTX_ST_CLR_5_R, "R200_SE_VTX_ST_CLR_5_R" },
+   { R200_SE_VTX_ST_CLR_5_G, "R200_SE_VTX_ST_CLR_5_G" },
+   { R200_SE_VTX_ST_CLR_5_B, "R200_SE_VTX_ST_CLR_5_B" },
+   { R200_SE_VTX_ST_CLR_5_A, "R200_SE_VTX_ST_CLR_5_A" },
+   { R200_SE_VTX_ST_CLR_6_R, "R200_SE_VTX_ST_CLR_6_R" },
+   { R200_SE_VTX_ST_CLR_6_G, "R200_SE_VTX_ST_CLR_6_G" },
+   { R200_SE_VTX_ST_CLR_6_B, "R200_SE_VTX_ST_CLR_6_B" },
+   { R200_SE_VTX_ST_CLR_6_A, "R200_SE_VTX_ST_CLR_6_A" },
+   { R200_SE_VTX_ST_CLR_7_R, "R200_SE_VTX_ST_CLR_7_R" },
+   { R200_SE_VTX_ST_CLR_7_G, "R200_SE_VTX_ST_CLR_7_G" },
+   { R200_SE_VTX_ST_CLR_7_B, "R200_SE_VTX_ST_CLR_7_B" },
+   { R200_SE_VTX_ST_CLR_7_A, "R200_SE_VTX_ST_CLR_7_A" },
+   { R200_SE_VTX_ST_TEX_0_S, "R200_SE_VTX_ST_TEX_0_S" },
+   { R200_SE_VTX_ST_TEX_0_T, "R200_SE_VTX_ST_TEX_0_T" },
+   { R200_SE_VTX_ST_TEX_0_R, "R200_SE_VTX_ST_TEX_0_R" },
+   { R200_SE_VTX_ST_TEX_0_Q, "R200_SE_VTX_ST_TEX_0_Q" },
+   { R200_SE_VTX_ST_TEX_1_S, "R200_SE_VTX_ST_TEX_1_S" },
+   { R200_SE_VTX_ST_TEX_1_T, "R200_SE_VTX_ST_TEX_1_T" },
+   { R200_SE_VTX_ST_TEX_1_R, "R200_SE_VTX_ST_TEX_1_R" },
+   { R200_SE_VTX_ST_TEX_1_Q, "R200_SE_VTX_ST_TEX_1_Q" },
+   { R200_SE_VTX_ST_TEX_2_S, "R200_SE_VTX_ST_TEX_2_S" },
+   { R200_SE_VTX_ST_TEX_2_T, "R200_SE_VTX_ST_TEX_2_T" },
+   { R200_SE_VTX_ST_TEX_2_R, "R200_SE_VTX_ST_TEX_2_R" },
+   { R200_SE_VTX_ST_TEX_2_Q, "R200_SE_VTX_ST_TEX_2_Q" },
+   { R200_SE_VTX_ST_TEX_3_S, "R200_SE_VTX_ST_TEX_3_S" },
+   { R200_SE_VTX_ST_TEX_3_T, "R200_SE_VTX_ST_TEX_3_T" },
+   { R200_SE_VTX_ST_TEX_3_R, "R200_SE_VTX_ST_TEX_3_R" },
+   { R200_SE_VTX_ST_TEX_3_Q, "R200_SE_VTX_ST_TEX_3_Q" },
+   { R200_SE_VTX_ST_TEX_4_S, "R200_SE_VTX_ST_TEX_4_S" },
+   { R200_SE_VTX_ST_TEX_4_T, "R200_SE_VTX_ST_TEX_4_T" },
+   { R200_SE_VTX_ST_TEX_4_R, "R200_SE_VTX_ST_TEX_4_R" },
+   { R200_SE_VTX_ST_TEX_4_Q, "R200_SE_VTX_ST_TEX_4_Q" },
+   { R200_SE_VTX_ST_TEX_5_S, "R200_SE_VTX_ST_TEX_5_S" },
+   { R200_SE_VTX_ST_TEX_5_T, "R200_SE_VTX_ST_TEX_5_T" },
+   { R200_SE_VTX_ST_TEX_5_R, "R200_SE_VTX_ST_TEX_5_R" },
+   { R200_SE_VTX_ST_TEX_5_Q, "R200_SE_VTX_ST_TEX_5_Q" },
+   { R200_SE_VTX_ST_PNT_SPRT_SZ, "R200_SE_VTX_ST_PNT_SPRT_SZ" },
+   { R200_SE_VTX_ST_DISC_FOG, "R200_SE_VTX_ST_DISC_FOG" },
+   { R200_SE_VTX_ST_SHININESS_0, "R200_SE_VTX_ST_SHININESS_0" },
+   { R200_SE_VTX_ST_SHININESS_1, "R200_SE_VTX_ST_SHININESS_1" },
+   { R200_SE_VTX_ST_BLND_WT_0, "R200_SE_VTX_ST_BLND_WT_0" },
+   { R200_SE_VTX_ST_BLND_WT_1, "R200_SE_VTX_ST_BLND_WT_1" },
+   { R200_SE_VTX_ST_BLND_WT_2, "R200_SE_VTX_ST_BLND_WT_2" },
+   { R200_SE_VTX_ST_BLND_WT_3, "R200_SE_VTX_ST_BLND_WT_3" },
+   { R200_SE_VTX_ST_POS_1_X, "R200_SE_VTX_ST_POS_1_X" },
+   { R200_SE_VTX_ST_POS_1_Y, "R200_SE_VTX_ST_POS_1_Y" },
+   { R200_SE_VTX_ST_POS_1_Z, "R200_SE_VTX_ST_POS_1_Z" },
+   { R200_SE_VTX_ST_POS_1_W, "R200_SE_VTX_ST_POS_1_W" },
+   { R200_SE_VTX_ST_NORM_1_X, "R200_SE_VTX_ST_NORM_1_X" },
+   { R200_SE_VTX_ST_NORM_1_Y, "R200_SE_VTX_ST_NORM_1_Y" },
+   { R200_SE_VTX_ST_NORM_1_Z, "R200_SE_VTX_ST_NORM_1_Z" },
+   { R200_SE_VTX_ST_USR_CLR_0_R, "R200_SE_VTX_ST_USR_CLR_0_R" },
+   { R200_SE_VTX_ST_USR_CLR_0_G, "R200_SE_VTX_ST_USR_CLR_0_G" },
+   { R200_SE_VTX_ST_USR_CLR_0_B, "R200_SE_VTX_ST_USR_CLR_0_B" },
+   { R200_SE_VTX_ST_USR_CLR_0_A, "R200_SE_VTX_ST_USR_CLR_0_A" },
+   { R200_SE_VTX_ST_USR_CLR_1_R, "R200_SE_VTX_ST_USR_CLR_1_R" },
+   { R200_SE_VTX_ST_USR_CLR_1_G, "R200_SE_VTX_ST_USR_CLR_1_G" },
+   { R200_SE_VTX_ST_USR_CLR_1_B, "R200_SE_VTX_ST_USR_CLR_1_B" },
+   { R200_SE_VTX_ST_USR_CLR_1_A, "R200_SE_VTX_ST_USR_CLR_1_A" },
+   { R200_SE_VTX_ST_CLR_0_PKD, "R200_SE_VTX_ST_CLR_0_PKD" },
+   { R200_SE_VTX_ST_CLR_1_PKD, "R200_SE_VTX_ST_CLR_1_PKD" },
+   { R200_SE_VTX_ST_CLR_2_PKD, "R200_SE_VTX_ST_CLR_2_PKD" },
+   { R200_SE_VTX_ST_CLR_3_PKD, "R200_SE_VTX_ST_CLR_3_PKD" },
+   { R200_SE_VTX_ST_CLR_4_PKD, "R200_SE_VTX_ST_CLR_4_PKD" },
+   { R200_SE_VTX_ST_CLR_5_PKD, "R200_SE_VTX_ST_CLR_5_PKD" },
+   { R200_SE_VTX_ST_CLR_6_PKD, "R200_SE_VTX_ST_CLR_6_PKD" },
+   { R200_SE_VTX_ST_CLR_7_PKD, "R200_SE_VTX_ST_CLR_7_PKD" },
+   { R200_SE_VTX_ST_POS_0_X_2, "R200_SE_VTX_ST_POS_0_X_2" },
+   { R200_SE_VTX_ST_POS_0_Y_2, "R200_SE_VTX_ST_POS_0_Y_2" },
+   { R200_SE_VTX_ST_PAR_CLR_LD, "R200_SE_VTX_ST_PAR_CLR_LD" },
+   { R200_SE_VTX_ST_USR_CLR_PKD, "R200_SE_VTX_ST_USR_CLR_PKD" },
+   { R200_SE_VTX_ST_POS_0_X_3, "R200_SE_VTX_ST_POS_0_X_3" },
+   { R200_SE_VTX_ST_POS_0_Y_3, "R200_SE_VTX_ST_POS_0_Y_3" },
+   { R200_SE_VTX_ST_POS_0_Z_3, "R200_SE_VTX_ST_POS_0_Z_3" },
+   { R200_SE_VTX_ST_END_OF_PKT, "R200_SE_VTX_ST_END_OF_PKT" },
+   { R200_RE_POINTSIZE, "R200_RE_POINTSIZE" },
+   { R200_RE_TOP_LEFT, "R200_RE_TOP_LEFT" },
+   { R200_RE_AUX_SCISSOR_CNTL, "R200_RE_AUX_SCISSOR_CNTL" },
+   { R200_PP_TXFILTER_0, "R200_PP_TXFILTER_0" },
+   { R200_PP_TXFORMAT_0, "R200_PP_TXFORMAT_0" },
+   { R200_PP_TXSIZE_0, "R200_PP_TXSIZE_0" },
+   { R200_PP_TXFORMAT_X_0, "R200_PP_TXFORMAT_X_0" },
+   { R200_PP_TXPITCH_0, "R200_PP_TXPITCH_0" },
+   { R200_PP_BORDER_COLOR_0, "R200_PP_BORDER_COLOR_0" },
+   { R200_PP_CUBIC_FACES_0, "R200_PP_CUBIC_FACES_0" },
+   { R200_PP_TXFILTER_1, "R200_PP_TXFILTER_1" },
+   { R200_PP_TXFORMAT_1, "R200_PP_TXFORMAT_1" },
+   { R200_PP_TXSIZE_1, "R200_PP_TXSIZE_1" },
+   { R200_PP_TXFORMAT_X_1, "R200_PP_TXFORMAT_X_1" },
+   { R200_PP_TXPITCH_1, "R200_PP_TXPITCH_1" },
+   { R200_PP_BORDER_COLOR_1, "R200_PP_BORDER_COLOR_1" },
+   { R200_PP_CUBIC_FACES_1, "R200_PP_CUBIC_FACES_1" },
+   { R200_PP_TXFILTER_2, "R200_PP_TXFILTER_2" },
+   { R200_PP_TXFORMAT_2, "R200_PP_TXFORMAT_2" },
+   { R200_PP_TXSIZE_2, "R200_PP_TXSIZE_2" },
+   { R200_PP_TXFORMAT_X_2, "R200_PP_TXFORMAT_X_2" },
+   { R200_PP_TXPITCH_2, "R200_PP_TXPITCH_2" },
+   { R200_PP_BORDER_COLOR_2, "R200_PP_BORDER_COLOR_2" },
+   { R200_PP_CUBIC_FACES_2, "R200_PP_CUBIC_FACES_2" },
+   { R200_PP_TXFILTER_3, "R200_PP_TXFILTER_3" },
+   { R200_PP_TXFORMAT_3, "R200_PP_TXFORMAT_3" },
+   { R200_PP_TXSIZE_3, "R200_PP_TXSIZE_3" },
+   { R200_PP_TXFORMAT_X_3, "R200_PP_TXFORMAT_X_3" },
+   { R200_PP_TXPITCH_3, "R200_PP_TXPITCH_3" },
+   { R200_PP_BORDER_COLOR_3, "R200_PP_BORDER_COLOR_3" },
+   { R200_PP_CUBIC_FACES_3, "R200_PP_CUBIC_FACES_3" },
+   { R200_PP_TXFILTER_4, "R200_PP_TXFILTER_4" },
+   { R200_PP_TXFORMAT_4, "R200_PP_TXFORMAT_4" },
+   { R200_PP_TXSIZE_4, "R200_PP_TXSIZE_4" },
+   { R200_PP_TXFORMAT_X_4, "R200_PP_TXFORMAT_X_4" },
+   { R200_PP_TXPITCH_4, "R200_PP_TXPITCH_4" },
+   { R200_PP_BORDER_COLOR_4, "R200_PP_BORDER_COLOR_4" },
+   { R200_PP_CUBIC_FACES_4, "R200_PP_CUBIC_FACES_4" },
+   { R200_PP_TXFILTER_5, "R200_PP_TXFILTER_5" },
+   { R200_PP_TXFORMAT_5, "R200_PP_TXFORMAT_5" },
+   { R200_PP_TXSIZE_5, "R200_PP_TXSIZE_5" },
+   { R200_PP_TXFORMAT_X_5, "R200_PP_TXFORMAT_X_5" },
+   { R200_PP_TXPITCH_5, "R200_PP_TXPITCH_5" },
+   { R200_PP_BORDER_COLOR_5, "R200_PP_BORDER_COLOR_5" },
+   { R200_PP_CUBIC_FACES_5, "R200_PP_CUBIC_FACES_5" },
+   { R200_PP_TXOFFSET_0, "R200_PP_TXOFFSET_0" },
+   { R200_PP_CUBIC_OFFSET_F1_0, "R200_PP_CUBIC_OFFSET_F1_0" },
+   { R200_PP_CUBIC_OFFSET_F2_0, "R200_PP_CUBIC_OFFSET_F2_0" },
+   { R200_PP_CUBIC_OFFSET_F3_0, "R200_PP_CUBIC_OFFSET_F3_0" },
+   { R200_PP_CUBIC_OFFSET_F4_0, "R200_PP_CUBIC_OFFSET_F4_0" },
+   { R200_PP_CUBIC_OFFSET_F5_0, "R200_PP_CUBIC_OFFSET_F5_0" },
+   { R200_PP_TXOFFSET_1, "R200_PP_TXOFFSET_1" },
+   { R200_PP_CUBIC_OFFSET_F1_1, "R200_PP_CUBIC_OFFSET_F1_1" },
+   { R200_PP_CUBIC_OFFSET_F2_1, "R200_PP_CUBIC_OFFSET_F2_1" },
+   { R200_PP_CUBIC_OFFSET_F3_1, "R200_PP_CUBIC_OFFSET_F3_1" },
+   { R200_PP_CUBIC_OFFSET_F4_1, "R200_PP_CUBIC_OFFSET_F4_1" },
+   { R200_PP_CUBIC_OFFSET_F5_1, "R200_PP_CUBIC_OFFSET_F5_1" },
+   { R200_PP_TXOFFSET_2, "R200_PP_TXOFFSET_2" },
+   { R200_PP_CUBIC_OFFSET_F1_2, "R200_PP_CUBIC_OFFSET_F1_2" },
+   { R200_PP_CUBIC_OFFSET_F2_2, "R200_PP_CUBIC_OFFSET_F2_2" },
+   { R200_PP_CUBIC_OFFSET_F3_2, "R200_PP_CUBIC_OFFSET_F3_2" },
+   { R200_PP_CUBIC_OFFSET_F4_2, "R200_PP_CUBIC_OFFSET_F4_2" },
+   { R200_PP_CUBIC_OFFSET_F5_2, "R200_PP_CUBIC_OFFSET_F5_2" },
+   { R200_PP_TXOFFSET_3, "R200_PP_TXOFFSET_3" },
+   { R200_PP_CUBIC_OFFSET_F1_3, "R200_PP_CUBIC_OFFSET_F1_3" },
+   { R200_PP_CUBIC_OFFSET_F2_3, "R200_PP_CUBIC_OFFSET_F2_3" },
+   { R200_PP_CUBIC_OFFSET_F3_3, "R200_PP_CUBIC_OFFSET_F3_3" },
+   { R200_PP_CUBIC_OFFSET_F4_3, "R200_PP_CUBIC_OFFSET_F4_3" },
+   { R200_PP_CUBIC_OFFSET_F5_3, "R200_PP_CUBIC_OFFSET_F5_3" },
+   { R200_PP_TXOFFSET_4, "R200_PP_TXOFFSET_4" },
+   { R200_PP_CUBIC_OFFSET_F1_4, "R200_PP_CUBIC_OFFSET_F1_4" },
+   { R200_PP_CUBIC_OFFSET_F2_4, "R200_PP_CUBIC_OFFSET_F2_4" },
+   { R200_PP_CUBIC_OFFSET_F3_4, "R200_PP_CUBIC_OFFSET_F3_4" },
+   { R200_PP_CUBIC_OFFSET_F4_4, "R200_PP_CUBIC_OFFSET_F4_4" },
+   { R200_PP_CUBIC_OFFSET_F5_4, "R200_PP_CUBIC_OFFSET_F5_4" },
+   { R200_PP_TXOFFSET_5, "R200_PP_TXOFFSET_5" },
+   { R200_PP_CUBIC_OFFSET_F1_5, "R200_PP_CUBIC_OFFSET_F1_5" },
+   { R200_PP_CUBIC_OFFSET_F2_5, "R200_PP_CUBIC_OFFSET_F2_5" },
+   { R200_PP_CUBIC_OFFSET_F3_5, "R200_PP_CUBIC_OFFSET_F3_5" },
+   { R200_PP_CUBIC_OFFSET_F4_5, "R200_PP_CUBIC_OFFSET_F4_5" },
+   { R200_PP_CUBIC_OFFSET_F5_5, "R200_PP_CUBIC_OFFSET_F5_5" },
+   { R200_PP_TAM_DEBUG3, "R200_PP_TAM_DEBUG3" },
+   { R200_PP_TFACTOR_0, "R200_PP_TFACTOR_0" },
+   { R200_PP_TFACTOR_1, "R200_PP_TFACTOR_1" },
+   { R200_PP_TFACTOR_2, "R200_PP_TFACTOR_2" },
+   { R200_PP_TFACTOR_3, "R200_PP_TFACTOR_3" },
+   { R200_PP_TFACTOR_4, "R200_PP_TFACTOR_4" },
+   { R200_PP_TFACTOR_5, "R200_PP_TFACTOR_5" },
+   { R200_PP_TXCBLEND_0, "R200_PP_TXCBLEND_0" },
+   { R200_PP_TXCBLEND2_0, "R200_PP_TXCBLEND2_0" },
+   { R200_PP_TXABLEND_0, "R200_PP_TXABLEND_0" },
+   { R200_PP_TXABLEND2_0, "R200_PP_TXABLEND2_0" },
+   { R200_PP_TXCBLEND_1, "R200_PP_TXCBLEND_1" },
+   { R200_PP_TXCBLEND2_1, "R200_PP_TXCBLEND2_1" },
+   { R200_PP_TXABLEND_1, "R200_PP_TXABLEND_1" },
+   { R200_PP_TXABLEND2_1, "R200_PP_TXABLEND2_1" },
+   { R200_PP_TXCBLEND_2, "R200_PP_TXCBLEND_2" },
+   { R200_PP_TXCBLEND2_2, "R200_PP_TXCBLEND2_2" },
+   { R200_PP_TXABLEND_2, "R200_PP_TXABLEND_2" },
+   { R200_PP_TXABLEND2_2, "R200_PP_TXABLEND2_2" },
+   { R200_PP_TXCBLEND_3, "R200_PP_TXCBLEND_3" },
+   { R200_PP_TXCBLEND2_3, "R200_PP_TXCBLEND2_3" },
+   { R200_PP_TXABLEND_3, "R200_PP_TXABLEND_3" },
+   { R200_PP_TXABLEND2_3, "R200_PP_TXABLEND2_3" },
+   { R200_PP_TXCBLEND_4, "R200_PP_TXCBLEND_4" },
+   { R200_PP_TXCBLEND2_4, "R200_PP_TXCBLEND2_4" },
+   { R200_PP_TXABLEND_4, "R200_PP_TXABLEND_4" },
+   { R200_PP_TXABLEND2_4, "R200_PP_TXABLEND2_4" },
+   { R200_PP_TXCBLEND_5, "R200_PP_TXCBLEND_5" },
+   { R200_PP_TXCBLEND2_5, "R200_PP_TXCBLEND2_5" },
+   { R200_PP_TXABLEND_5, "R200_PP_TXABLEND_5" },
+   { R200_PP_TXABLEND2_5, "R200_PP_TXABLEND2_5" },
+   { R200_PP_TXCBLEND_6, "R200_PP_TXCBLEND_6" },
+   { R200_PP_TXCBLEND2_6, "R200_PP_TXCBLEND2_6" },
+   { R200_PP_TXABLEND_6, "R200_PP_TXABLEND_6" },
+   { R200_PP_TXABLEND2_6, "R200_PP_TXABLEND2_6" },
+   { R200_PP_TXCBLEND_7, "R200_PP_TXCBLEND_7" },
+   { R200_PP_TXCBLEND2_7, "R200_PP_TXCBLEND2_7" },
+   { R200_PP_TXABLEND_7, "R200_PP_TXABLEND_7" },
+   { R200_PP_TXABLEND2_7, "R200_PP_TXABLEND2_7" },
+   { R200_RB3D_ABLENDCNTL, "R200_RB3D_ABLENDCNTL" },
+   { R200_RB3D_CBLENDCNTL, "R200_RB3D_CBLENDCNTL" },
+   { R200_SE_TCL_OUTPUT_VTX_COMP_SEL, "R200_SE_TCL_OUTPUT_VTX_COMP_SEL" },
+   { R200_PP_CNTL_X, "R200_PP_CNTL_X" },
+   { R200_SE_VAP_CNTL_STATUS, "R200_SE_VAP_CNTL_STATUS" }, 
+   { R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0" },
+   { R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_1, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_1" }, 
+   { R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_2, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_2" }, 
+   { R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_3, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_3" }, 
+};
+
+static struct reg_names scalar_names[] = {
+   { R200_SS_LIGHT_DCD_ADDR, "R200_SS_LIGHT_DCD_ADDR" },
+   { R200_SS_LIGHT_DCM_ADDR, "R200_SS_LIGHT_DCM_ADDR" },
+   { R200_SS_LIGHT_SPOT_EXPONENT_ADDR, "R200_SS_LIGHT_SPOT_EXPONENT_ADDR" },
+   { R200_SS_LIGHT_SPOT_CUTOFF_ADDR, "R200_SS_LIGHT_SPOT_CUTOFF_ADDR" },
+   { R200_SS_LIGHT_SPECULAR_THRESH_ADDR, "R200_SS_LIGHT_SPECULAR_THRESH_ADDR" },
+   { R200_SS_LIGHT_RANGE_CUTOFF_SQRD, "R200_SS_LIGHT_RANGE_CUTOFF_SQRD" },
+   { R200_SS_LIGHT_RANGE_ATT_CONST, "R200_SS_LIGHT_RANGE_ATT_CONST" },
+   { R200_SS_VERT_GUARD_CLIP_ADJ_ADDR, "R200_SS_VERT_GUARD_CLIP_ADJ_ADDR" },
+   { R200_SS_VERT_GUARD_DISCARD_ADJ_ADDR, "R200_SS_VERT_GUARD_DISCARD_ADJ_ADDR" },
+   { R200_SS_HORZ_GUARD_CLIP_ADJ_ADDR, "R200_SS_HORZ_GUARD_CLIP_ADJ_ADDR" },
+   { R200_SS_HORZ_GUARD_DISCARD_ADJ_ADDR, "R200_SS_HORZ_GUARD_DISCARD_ADJ_ADDR" },
+   { R200_SS_MAT_0_SHININESS, "R200_SS_MAT_0_SHININESS" },
+   { R200_SS_MAT_1_SHININESS, "R200_SS_MAT_1_SHININESS" },
+   { 1000, "" },
+};
+
+/* Puff these out to make them look like normal (dword) registers.
+ */
+static struct reg_names vector_names[] = {
+   { 0, "start" },
+   { R200_VS_LIGHT_AMBIENT_ADDR, "R200_VS_LIGHT_AMBIENT_ADDR" },
+   { R200_VS_LIGHT_DIFFUSE_ADDR, "R200_VS_LIGHT_DIFFUSE_ADDR" },
+   { R200_VS_LIGHT_SPECULAR_ADDR, "R200_VS_LIGHT_SPECULAR_ADDR" },
+   { R200_VS_LIGHT_DIRPOS_ADDR, "R200_VS_LIGHT_DIRPOS_ADDR" },
+   { R200_VS_LIGHT_HWVSPOT_ADDR, "R200_VS_LIGHT_HWVSPOT_ADDR" },
+   { R200_VS_LIGHT_ATTENUATION_ADDR, "R200_VS_LIGHT_ATTENUATION_ADDR" },
+   { R200_VS_SPOT_DUAL_CONE, "R200_VS_SPOT_DUAL_CONE" },
+   { R200_VS_GLOBAL_AMBIENT_ADDR, "R200_VS_GLOBAL_AMBIENT_ADDR" },
+   { R200_VS_FOG_PARAM_ADDR, "R200_VS_FOG_PARAM_ADDR" },
+   { R200_VS_EYE_VECTOR_ADDR, "R200_VS_EYE_VECTOR_ADDR" },
+   { R200_VS_UCP_ADDR, "R200_VS_UCP_ADDR" },
+   { R200_VS_PNT_SPRITE_VPORT_SCALE, "R200_VS_PNT_SPRITE_VPORT_SCALE" },
+   { R200_VS_MATRIX_0_MV, "R200_VS_MATRIX_0_MV" },
+   { R200_VS_MATRIX_1_INV_MV, "R200_VS_MATRIX_1_INV_MV" },
+   { R200_VS_MATRIX_2_MVP, "R200_VS_MATRIX_2_MVP" },
+   { R200_VS_MATRIX_3_TEX0, "R200_VS_MATRIX_3_TEX0" },
+   { R200_VS_MATRIX_4_TEX1, "R200_VS_MATRIX_4_TEX1" },
+   { R200_VS_MATRIX_5_TEX2, "R200_VS_MATRIX_5_TEX2" },
+   { R200_VS_MATRIX_6_TEX3, "R200_VS_MATRIX_6_TEX3" },
+   { R200_VS_MATRIX_7_TEX4, "R200_VS_MATRIX_7_TEX4" },
+   { R200_VS_MATRIX_8_TEX5, "R200_VS_MATRIX_8_TEX5" },
+   { R200_VS_MAT_0_EMISS, "R200_VS_MAT_0_EMISS" },
+   { R200_VS_MAT_0_AMB, "R200_VS_MAT_0_AMB" },
+   { R200_VS_MAT_0_DIF, "R200_VS_MAT_0_DIF" },
+   { R200_VS_MAT_0_SPEC, "R200_VS_MAT_0_SPEC" },
+   { R200_VS_MAT_1_EMISS, "R200_VS_MAT_1_EMISS" },
+   { R200_VS_MAT_1_AMB, "R200_VS_MAT_1_AMB" },
+   { R200_VS_MAT_1_DIF, "R200_VS_MAT_1_DIF" },
+   { R200_VS_MAT_1_SPEC, "R200_VS_MAT_1_SPEC" },
+   { R200_VS_EYE2CLIP_MTX, "R200_VS_EYE2CLIP_MTX" },
+   { R200_VS_PNT_SPRITE_ATT_CONST, "R200_VS_PNT_SPRITE_ATT_CONST" },
+   { R200_VS_PNT_SPRITE_EYE_IN_MODEL, "R200_VS_PNT_SPRITE_EYE_IN_MODEL" },
+   { R200_VS_PNT_SPRITE_CLAMP, "R200_VS_PNT_SPRITE_CLAMP" },
+   { R200_VS_MAX, "R200_VS_MAX" },
+   { 1000, "" },
+};
+
+union fi { float f; int i; };
+
+#define ISVEC   1
+#define ISFLOAT 2
+#define TOUCHED 4
+
+struct reg {
+   int idx; 
+   struct reg_names *closest;
+   int flags;
+   union fi current;
+   union fi *values;
+   int nvalues;
+   int nalloc;
+   float vmin, vmax;
+};
+
+
+static struct reg regs[Elements(reg_names)+1];
+static struct reg scalars[512+1];
+static struct reg vectors[512*4+1];
+
+static int total, total_changed, bufs;
+
+static void init_regs( void )
+{
+   struct reg_names *tmp;
+   int i;
+
+   for (i = 0 ; i < Elements(regs) ; i++) {
+      regs[i].idx = reg_names[i].idx;
+      regs[i].closest = &reg_names[i];
+      regs[i].flags = 0;
+   }
+
+   for (i = 0, tmp = scalar_names ; i < Elements(scalars) ; i++) {
+      if (tmp[1].idx == i) tmp++;
+      scalars[i].idx = i;
+      scalars[i].closest = tmp;
+      scalars[i].flags = ISFLOAT;
+   }
+
+   for (i = 0, tmp = vector_names ; i < Elements(vectors) ; i++) {
+      if (tmp[1].idx*4 == i) tmp++;
+      vectors[i].idx = i;
+      vectors[i].closest = tmp;
+      vectors[i].flags = ISFLOAT|ISVEC;
+   }
+
+   regs[Elements(regs)-1].idx = -1;
+   scalars[Elements(scalars)-1].idx = -1;
+   vectors[Elements(vectors)-1].idx = -1;
+}
+
+static int find_or_add_value( struct reg *reg, int val )
+{
+   int j;
+
+   for ( j = 0 ; j < reg->nvalues ; j++)
+      if ( val == reg->values[j].i )
+        return 1;
+
+   if (j == reg->nalloc) {
+      reg->nalloc += 5;
+      reg->nalloc *= 2;
+      reg->values = (union fi *) realloc( reg->values, 
+                                         reg->nalloc * sizeof(union fi) );
+   }
+
+   reg->values[reg->nvalues++].i = val;
+   return 0;
+}
+
+static struct reg *lookup_reg( struct reg *tab, int reg )
+{
+   int i;
+
+   for (i = 0 ; tab[i].idx != -1 ; i++) {
+      if (tab[i].idx == reg)
+        return &tab[i];
+   }
+
+   fprintf(stderr, "*** unknown reg 0x%x\n", reg);
+   return 0;
+}
+
+
+static const char *get_reg_name( struct reg *reg )
+{
+   static char tmp[80];
+
+   if (reg->idx == reg->closest->idx) 
+      return reg->closest->name;
+
+   
+   if (reg->flags & ISVEC) {
+      if (reg->idx/4 != reg->closest->idx)
+        sprintf(tmp, "%s+%d[%d]", 
+                reg->closest->name, 
+                (reg->idx/4) - reg->closest->idx,
+                reg->idx%4);
+      else
+        sprintf(tmp, "%s[%d]", reg->closest->name, reg->idx%4);
+   }
+   else {
+      if (reg->idx != reg->closest->idx)
+        sprintf(tmp, "%s+%d", reg->closest->name, reg->idx - reg->closest->idx);
+      else
+        sprintf(tmp, "%s", reg->closest->name);
+   }
+
+   return tmp;
+}
+
+static int print_int_reg_assignment( struct reg *reg, int data )
+{
+   int changed = (reg->current.i != data);
+   int ever_seen = find_or_add_value( reg, data );
+   
+   if (VERBOSE || (NORMAL && (changed || !ever_seen)))
+       fprintf(stderr, "   %s <-- 0x%x", get_reg_name(reg), data);
+       
+   if (NORMAL) {
+      if (!ever_seen) 
+        fprintf(stderr, " *** BRAND NEW VALUE");
+      else if (changed) 
+        fprintf(stderr, " *** CHANGED"); 
+   }
+   
+   reg->current.i = data;
+
+   if (VERBOSE || (NORMAL && (changed || !ever_seen)))
+      fprintf(stderr, "\n");
+
+   return changed;
+}
+
+
+static int print_float_reg_assignment( struct reg *reg, float data )
+{
+   int changed = (reg->current.f != data);
+   int newmin = (data < reg->vmin);
+   int newmax = (data > reg->vmax);
+
+   if (VERBOSE || (NORMAL && (newmin || newmax || changed)))
+      fprintf(stderr, "   %s <-- %.3f", get_reg_name(reg), data);
+
+   if (NORMAL) {
+      if (newmin) {
+        fprintf(stderr, " *** NEW MIN (prev %.3f)", reg->vmin);
+        reg->vmin = data;
+      }
+      else if (newmax) {
+        fprintf(stderr, " *** NEW MAX (prev %.3f)", reg->vmax);
+        reg->vmax = data;
+      }
+      else if (changed) {
+        fprintf(stderr, " *** CHANGED");
+      }
+   }
+
+   reg->current.f = data;
+
+   if (VERBOSE || (NORMAL && (newmin || newmax || changed)))
+      fprintf(stderr, "\n");
+
+   return changed;
+}
+
+static int print_reg_assignment( struct reg *reg, int data )
+{
+   reg->flags |= TOUCHED;
+   if (reg->flags & ISFLOAT)
+      return print_float_reg_assignment( reg, *(float *)&data );
+   else
+      return print_int_reg_assignment( reg, data );
+}
+
+static void print_reg( struct reg *reg )
+{
+   if (reg->flags & TOUCHED) {
+      if (reg->flags & ISFLOAT) {
+        fprintf(stderr, "   %s == %f\n", get_reg_name(reg), reg->current.f);
+      } else {
+        fprintf(stderr, "   %s == 0x%x\n", get_reg_name(reg), reg->current.i);
+      }
+   }
+}
+
+
+static void dump_state( void )
+{
+   int i;
+
+   for (i = 0 ; i < Elements(regs) ; i++) 
+      print_reg( &regs[i] );
+
+   for (i = 0 ; i < Elements(scalars) ; i++) 
+      print_reg( &scalars[i] );
+
+   for (i = 0 ; i < Elements(vectors) ; i++) 
+      print_reg( &vectors[i] );
+}
+
+
+
+static int radeon_emit_packets( 
+   drmRadeonCmdHeader header,
+   drmRadeonCmdBuffer *cmdbuf )
+{
+   int id = (int)header.packet.packet_id;
+   int sz = packet[id].len;
+   int *data = (int *)cmdbuf->buf;
+   int i;
+   
+   if (sz * sizeof(int) > cmdbuf->bufsz) {
+      fprintf(stderr, "Packet overflows cmdbuf\n");      
+      return -EINVAL;
+   }
+
+   if (!packet[id].name) {
+      fprintf(stderr, "*** Unknown packet 0 nr %d\n", id );
+      return -EINVAL;
+   }
+
+   
+   if (VERBOSE) 
+      fprintf(stderr, "Packet 0 reg %s nr %d\n", packet[id].name, sz );
+
+   for ( i = 0 ; i < sz ; i++) {
+      struct reg *reg = lookup_reg( regs, packet[id].start + i*4 );
+      if (print_reg_assignment( reg, data[i] ))
+        total_changed++;
+      total++;
+   }
+
+   cmdbuf->buf += sz * sizeof(int);
+   cmdbuf->bufsz -= sz * sizeof(int);
+   return 0;
+}
+
+
+static int radeon_emit_scalars( 
+   drmRadeonCmdHeader header,
+   drmRadeonCmdBuffer *cmdbuf )
+{
+   int sz = header.scalars.count;
+   int *data = (int *)cmdbuf->buf;
+   int start = header.scalars.offset;
+   int stride = header.scalars.stride;
+   int i;
+
+   if (VERBOSE)
+      fprintf(stderr, "emit scalars, start %d stride %d nr %d (end %d)\n",
+             start, stride, sz, start + stride * sz);
+
+
+   for (i = 0 ; i < sz ; i++, start += stride) {
+      struct reg *reg = lookup_reg( scalars, start );
+      if (print_reg_assignment( reg, data[i] ))
+        total_changed++;
+      total++;
+   }
+        
+   cmdbuf->buf += sz * sizeof(int);
+   cmdbuf->bufsz -= sz * sizeof(int);
+   return 0;
+}
+
+
+static int radeon_emit_scalars2( 
+   drmRadeonCmdHeader header,
+   drmRadeonCmdBuffer *cmdbuf )
+{
+   int sz = header.scalars.count;
+   int *data = (int *)cmdbuf->buf;
+   int start = header.scalars.offset + 0x100;
+   int stride = header.scalars.stride;
+   int i;
+
+   if (VERBOSE)
+      fprintf(stderr, "emit scalars2, start %d stride %d nr %d (end %d)\n",
+             start, stride, sz, start + stride * sz);
+
+   if (start + stride * sz > 257) {
+      fprintf(stderr, "emit scalars OVERFLOW %d/%d/%d\n", start, stride, sz);
+      return -1;
+   }
+
+   for (i = 0 ; i < sz ; i++, start += stride) {
+      struct reg *reg = lookup_reg( scalars, start );
+      if (print_reg_assignment( reg, data[i] ))
+        total_changed++;
+      total++;
+   }
+        
+   cmdbuf->buf += sz * sizeof(int);
+   cmdbuf->bufsz -= sz * sizeof(int);
+   return 0;
+}
+
+/* Check: inf/nan/extreme-size?
+ * Check: table start, end, nr, etc.
+ */
+static int radeon_emit_vectors( 
+   drmRadeonCmdHeader header,
+   drmRadeonCmdBuffer *cmdbuf )
+{
+   int sz = header.vectors.count;
+   int *data = (int *)cmdbuf->buf;
+   int start = header.vectors.offset;
+   int stride = header.vectors.stride;
+   int i,j;
+
+   if (VERBOSE)
+      fprintf(stderr, "emit vectors, start %d stride %d nr %d (end %d) (0x%x)\n",
+             start, stride, sz, start + stride * sz, header.i);
+
+/*    if (start + stride * (sz/4) > 128) { */
+/*       fprintf(stderr, "emit vectors OVERFLOW %d/%d/%d\n", start, stride, sz); */
+/*       return -1; */
+/*    } */
+
+   for (i = 0 ; i < sz ;  start += stride) {
+      int changed = 0;
+      for (j = 0 ; j < 4 ; i++,j++) {
+        struct reg *reg = lookup_reg( vectors, start*4+j );
+        if (print_reg_assignment( reg, data[i] ))
+           changed = 1;
+      }
+      if (changed)
+        total_changed += 4;
+      total += 4;
+   }
+        
+
+   cmdbuf->buf += sz * sizeof(int);
+   cmdbuf->bufsz -= sz * sizeof(int);
+   return 0;
+}
+
+#if 0
+static int print_vertex_format( int vfmt )
+{
+   if (NORMAL) {
+      fprintf(stderr, "   %s(%x): %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+             "vertex format",
+             vfmt,
+             "xy,",
+             (vfmt & R200_VTX_Z0) ? "z," : "",
+             (vfmt & R200_VTX_W0) ? "w0," : "",
+             (vfmt & R200_VTX_FPCOLOR) ? "fpcolor," : "",
+             (vfmt & R200_VTX_FPALPHA) ? "fpalpha," : "",
+             (vfmt & R200_VTX_PKCOLOR) ? "pkcolor," : "",
+             (vfmt & R200_VTX_FPSPEC) ? "fpspec," : "",
+             (vfmt & R200_VTX_FPFOG) ? "fpfog," : "",
+             (vfmt & R200_VTX_PKSPEC) ? "pkspec," : "",
+             (vfmt & R200_VTX_ST0) ? "st0," : "",
+             (vfmt & R200_VTX_ST1) ? "st1," : "",
+             (vfmt & R200_VTX_Q1) ? "q1," : "",
+             (vfmt & R200_VTX_ST2) ? "st2," : "",
+             (vfmt & R200_VTX_Q2) ? "q2," : "",
+             (vfmt & R200_VTX_ST3) ? "st3," : "",
+             (vfmt & R200_VTX_Q3) ? "q3," : "",
+             (vfmt & R200_VTX_Q0) ? "q0," : "",
+             (vfmt & R200_VTX_N0) ? "n0," : "",
+             (vfmt & R200_VTX_XY1) ? "xy1," : "",
+             (vfmt & R200_VTX_Z1) ? "z1," : "",
+             (vfmt & R200_VTX_W1) ? "w1," : "",
+             (vfmt & R200_VTX_N1) ? "n1," : "");
+
+   
+      if (!find_or_add_value( &others[V_VTXFMT], vfmt ))
+        fprintf(stderr, " *** NEW VALUE");
+
+      fprintf(stderr, "\n");
+   }
+
+   return 0;
+}
+#endif
+
+static char *primname[0x10] = {
+   "NONE",
+   "POINTS",
+   "LINES",
+   "LINE_STRIP",
+   "TRIANGLES",
+   "TRIANGLE_FAN",
+   "TRIANGLE_STRIP",
+   "RECT_LIST",
+   0,
+   "3VRT_POINTS",
+   "3VRT_LINES",
+   "POINT_SPRITES",
+   "LINE_LOOP",
+   "QUADS",
+   "QUAD_STRIP",
+   "POLYGON",
+};
+
+static int print_prim_and_flags( int prim )
+{
+   int numverts;
+   
+   if (NORMAL)
+      fprintf(stderr, "   %s(%x): %s%s%s%s%s%s\n",
+             "prim flags",
+             prim,
+             ((prim & 0x30) == R200_VF_PRIM_WALK_IND) ? "IND," : "",
+             ((prim & 0x30) == R200_VF_PRIM_WALK_LIST) ? "LIST," : "",
+             ((prim & 0x30) == R200_VF_PRIM_WALK_RING) ? "RING," : "",
+             (prim & R200_VF_COLOR_ORDER_RGBA) ? "RGBA," : "BGRA, ",
+             (prim & R200_VF_INDEX_SZ_4) ? "INDX-32," : "",
+             (prim & R200_VF_TCL_OUTPUT_VTX_ENABLE) ? "TCL_OUT_VTX," : "");
+
+   numverts = prim>>16;
+   
+   if (NORMAL)
+      fprintf(stderr, "   prim: %s numverts %d\n", primname[prim&0xf], numverts);
+
+   switch (prim & 0xf) {
+   case R200_VF_PRIM_NONE:
+   case R200_VF_PRIM_POINTS:
+      if (numverts < 1) {
+        fprintf(stderr, "Bad nr verts for line %d\n", numverts);
+        return -1;
+      }
+      break;
+   case R200_VF_PRIM_LINES:
+   case R200_VF_PRIM_POINT_SPRITES:
+      if ((numverts & 1) || numverts == 0) {
+        fprintf(stderr, "Bad nr verts for line %d\n", numverts);
+        return -1;
+      }
+      break;
+   case R200_VF_PRIM_LINE_STRIP:
+   case R200_VF_PRIM_LINE_LOOP:
+      if (numverts < 2) {
+        fprintf(stderr, "Bad nr verts for line_strip %d\n", numverts);
+        return -1;
+      }
+      break;
+   case R200_VF_PRIM_TRIANGLES:
+   case R200_VF_PRIM_3VRT_POINTS:
+   case R200_VF_PRIM_3VRT_LINES:
+   case R200_VF_PRIM_RECT_LIST:
+      if (numverts % 3 || numverts == 0) {
+        fprintf(stderr, "Bad nr verts for tri %d\n", numverts);
+        return -1;
+      }
+      break;
+   case R200_VF_PRIM_TRIANGLE_FAN:
+   case R200_VF_PRIM_TRIANGLE_STRIP:
+   case R200_VF_PRIM_POLYGON:
+      if (numverts < 3) {
+        fprintf(stderr, "Bad nr verts for strip/fan %d\n", numverts);
+        return -1;
+      }
+      break;
+   case R200_VF_PRIM_QUADS:
+      if (numverts % 4 || numverts == 0) {
+        fprintf(stderr, "Bad nr verts for quad %d\n", numverts);
+        return -1;
+      }
+      break;
+   case R200_VF_PRIM_QUAD_STRIP:
+      if (numverts % 2 || numverts < 4) {
+        fprintf(stderr, "Bad nr verts for quadstrip %d\n", numverts);
+        return -1;
+      }
+      break;
+   default:
+      fprintf(stderr, "Bad primitive\n");
+      return -1;
+   }   
+   return 0;
+}
+
+/* build in knowledge about each packet type
+ */
+static int radeon_emit_packet3( drmRadeonCmdBuffer *cmdbuf )
+{
+   int cmdsz;
+   int *cmd = (int *)cmdbuf->buf;
+   int *tmp;
+   int i, stride, size, start;
+
+   cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
+
+   if ((cmd[0] & RADEON_CP_PACKET_MASK) != RADEON_CP_PACKET3 ||
+       cmdsz * 4 > cmdbuf->bufsz ||
+       cmdsz > RADEON_CP_PACKET_MAX_DWORDS) {
+      fprintf(stderr, "Bad packet\n");
+      return -EINVAL;
+   }
+
+   switch( cmd[0] & ~RADEON_CP_PACKET_COUNT_MASK ) {
+   case R200_CP_CMD_NOP:
+      if (NORMAL)
+        fprintf(stderr, "PACKET3_NOP, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_NEXT_CHAR:
+      if (NORMAL)
+        fprintf(stderr, "PACKET3_NEXT_CHAR, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_PLY_NEXTSCAN:
+      if (NORMAL)
+        fprintf(stderr, "PACKET3_PLY_NEXTSCAN, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_SET_SCISSORS:
+      if (NORMAL)
+        fprintf(stderr, "PACKET3_SET_SCISSORS, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_LOAD_MICROCODE:
+      if (NORMAL)
+        fprintf(stderr, "PACKET3_LOAD_MICROCODE, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_WAIT_FOR_IDLE:
+      if (NORMAL)
+        fprintf(stderr, "PACKET3_WAIT_FOR_IDLE, %d dwords\n", cmdsz);
+      break;
+
+   case R200_CP_CMD_3D_DRAW_VBUF:
+      if (NORMAL)
+        fprintf(stderr, "PACKET3_3D_DRAW_VBUF, %d dwords\n", cmdsz);
+/*       print_vertex_format(cmd[1]); */
+      if (print_prim_and_flags(cmd[2]))
+        return -EINVAL;
+      break;
+
+   case R200_CP_CMD_3D_DRAW_IMMD:
+      if (NORMAL)
+        fprintf(stderr, "PACKET3_3D_DRAW_IMMD, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_3D_DRAW_INDX: {
+      int neltdwords;
+      if (NORMAL)
+        fprintf(stderr, "PACKET3_3D_DRAW_INDX, %d dwords\n", cmdsz);
+/*       print_vertex_format(cmd[1]); */
+      if (print_prim_and_flags(cmd[2]))
+        return -EINVAL;
+      neltdwords = cmd[2]>>16;
+      neltdwords += neltdwords & 1;
+      neltdwords /= 2;
+      if (neltdwords + 3 != cmdsz)
+        fprintf(stderr, "Mismatch in DRAW_INDX, %d vs cmdsz %d\n",
+                neltdwords, cmdsz);
+      break;
+   }
+   case R200_CP_CMD_LOAD_PALETTE:
+      if (NORMAL)
+        fprintf(stderr, "PACKET3_LOAD_PALETTE, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_3D_LOAD_VBPNTR:
+      if (NORMAL) {
+        fprintf(stderr, "PACKET3_3D_LOAD_VBPNTR, %d dwords\n", cmdsz);
+        fprintf(stderr, "   nr arrays: %d\n", cmd[1]);
+      }
+
+      if (((cmd[1]/2)*3) + ((cmd[1]%2)*2) != cmdsz - 2) {
+        fprintf(stderr, "  ****** MISMATCH %d/%d *******\n",
+                ((cmd[1]/2)*3) + ((cmd[1]%2)*2) + 2, cmdsz);
+        return -EINVAL;
+      }
+
+      if (NORMAL) {
+        tmp = cmd+2;
+        for (i = 0 ; i < cmd[1] ; i++) {
+           if (i & 1) {
+              stride = (tmp[0]>>24) & 0xff;
+              size = (tmp[0]>>16) & 0xff;
+              start = tmp[2];
+              tmp += 3;
+           }
+           else {
+              stride = (tmp[0]>>8) & 0xff;
+              size = (tmp[0]) & 0xff;
+              start = tmp[1];
+           }
+           fprintf(stderr, "   array %d: start 0x%x vsize %d vstride %d\n",
+                   i, start, size, stride );
+        }
+      }
+      break;
+   case R200_CP_CMD_PAINT:
+      if (NORMAL)
+        fprintf(stderr, "PACKET3_CNTL_PAINT, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_BITBLT:
+      if (NORMAL)
+        fprintf(stderr, "PACKET3_CNTL_BITBLT, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_SMALLTEXT:
+      if (NORMAL)
+        fprintf(stderr, "PACKET3_CNTL_SMALLTEXT, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_HOSTDATA_BLT:
+      if (NORMAL)
+        fprintf(stderr, "PACKET3_CNTL_HOSTDATA_BLT, %d dwords\n", 
+             cmdsz);
+      break;
+   case R200_CP_CMD_POLYLINE:
+      if (NORMAL)
+        fprintf(stderr, "PACKET3_CNTL_POLYLINE, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_POLYSCANLINES:
+      if (NORMAL)
+        fprintf(stderr, "PACKET3_CNTL_POLYSCANLINES, %d dwords\n", 
+             cmdsz);
+      break;
+   case R200_CP_CMD_PAINT_MULTI:
+      if (NORMAL)
+        fprintf(stderr, "PACKET3_CNTL_PAINT_MULTI, %d dwords\n", 
+             cmdsz);
+      break;
+   case R200_CP_CMD_BITBLT_MULTI:
+      if (NORMAL)
+        fprintf(stderr, "PACKET3_CNTL_BITBLT_MULTI, %d dwords\n", 
+             cmdsz);
+      break;
+   case R200_CP_CMD_TRANS_BITBLT:
+      if (NORMAL)
+        fprintf(stderr, "PACKET3_CNTL_TRANS_BITBLT, %d dwords\n", 
+             cmdsz);
+      break;
+   case R200_CP_CMD_3D_DRAW_VBUF_2:
+      if (NORMAL)
+        fprintf(stderr, "R200_CP_CMD_3D_DRAW_VBUF_2, %d dwords\n", 
+             cmdsz);
+      if (print_prim_and_flags(cmd[1]))
+        return -EINVAL;
+      break;
+   case R200_CP_CMD_3D_DRAW_IMMD_2:
+      if (NORMAL)
+        fprintf(stderr, "R200_CP_CMD_3D_DRAW_IMMD_2, %d dwords\n", 
+             cmdsz);
+      if (print_prim_and_flags(cmd[1]))
+        return -EINVAL;
+      break;
+   case R200_CP_CMD_3D_DRAW_INDX_2:
+      if (NORMAL)
+        fprintf(stderr, "R200_CP_CMD_3D_DRAW_INDX_2, %d dwords\n", 
+             cmdsz);
+      if (print_prim_and_flags(cmd[1]))
+        return -EINVAL;
+      break;
+   default:
+      fprintf(stderr, "UNKNOWN PACKET, %d dwords\n", cmdsz);
+      break;
+   }
+      
+   cmdbuf->buf += cmdsz * 4;
+   cmdbuf->bufsz -= cmdsz * 4;
+   return 0;
+}
+
+
+/* Check cliprects for bounds, then pass on to above:
+ */
+static int radeon_emit_packet3_cliprect( drmRadeonCmdBuffer *cmdbuf )
+{   
+   XF86DRIClipRectRec *boxes = (XF86DRIClipRectRec *)cmdbuf->boxes;
+   int i = 0;
+
+   if (VERBOSE && total_changed) {
+      dump_state();
+      total_changed = 0;
+   }
+
+   if (NORMAL) {
+      do {
+        if ( i < cmdbuf->nbox ) {
+           fprintf(stderr, "Emit box %d/%d %d,%d %d,%d\n",
+                   i, cmdbuf->nbox,
+                   boxes[i].x1, boxes[i].y1, boxes[i].x2, boxes[i].y2);
+        }
+      } while ( ++i < cmdbuf->nbox );
+   }
+
+   if (cmdbuf->nbox == 1)
+      cmdbuf->nbox = 0;
+
+   return radeon_emit_packet3( cmdbuf );
+}
+
+
+int r200SanityCmdBuffer( r200ContextPtr rmesa,
+                          int nbox,
+                          XF86DRIClipRectRec *boxes )
+{
+   int idx;
+   drmRadeonCmdBuffer cmdbuf;
+   drmRadeonCmdHeader header;
+   static int inited = 0;
+
+   if (!inited) {
+      init_regs();
+      inited = 1;
+   }
+
+
+   cmdbuf.buf = rmesa->store.cmd_buf;
+   cmdbuf.bufsz = rmesa->store.cmd_used;
+   cmdbuf.boxes = (drmClipRect *)boxes;
+   cmdbuf.nbox = nbox;
+
+   while ( cmdbuf.bufsz >= sizeof(header) ) {
+               
+      header.i = *(int *)cmdbuf.buf;
+      cmdbuf.buf += sizeof(header);
+      cmdbuf.bufsz -= sizeof(header);
+
+      switch (header.header.cmd_type) {
+      case RADEON_CMD_PACKET: 
+        if (radeon_emit_packets( header, &cmdbuf )) {
+           fprintf(stderr,"radeon_emit_packets failed\n");
+           return -EINVAL;
+        }
+        break;
+
+      case RADEON_CMD_SCALARS:
+        if (radeon_emit_scalars( header, &cmdbuf )) {
+           fprintf(stderr,"radeon_emit_scalars failed\n");
+           return -EINVAL;
+        }
+        break;
+
+      case RADEON_CMD_SCALARS2:
+        if (radeon_emit_scalars2( header, &cmdbuf )) {
+           fprintf(stderr,"radeon_emit_scalars failed\n");
+           return -EINVAL;
+        }
+        break;
+
+      case RADEON_CMD_VECTORS:
+        if (radeon_emit_vectors( header, &cmdbuf )) {
+           fprintf(stderr,"radeon_emit_vectors failed\n");
+           return -EINVAL;
+        }
+        break;
+
+      case RADEON_CMD_DMA_DISCARD:
+        idx = header.dma.buf_idx;
+        if (NORMAL)
+           fprintf(stderr, "RADEON_CMD_DMA_DISCARD buf %d\n", idx);
+        bufs++;
+        break;
+
+      case RADEON_CMD_PACKET3:
+        if (radeon_emit_packet3( &cmdbuf )) {
+           fprintf(stderr,"radeon_emit_packet3 failed\n");
+           return -EINVAL;
+        }
+        break;
+
+      case RADEON_CMD_PACKET3_CLIP:
+        if (radeon_emit_packet3_cliprect( &cmdbuf )) {
+           fprintf(stderr,"radeon_emit_packet3_clip failed\n");
+           return -EINVAL;
+        }
+        break;
+
+      case RADEON_CMD_WAIT:
+        break;
+
+      default:
+        fprintf(stderr,"bad cmd_type %d at %p\n", 
+                  header.header.cmd_type,
+                  cmdbuf.buf - sizeof(header));
+        return -EINVAL;
+      }
+   }
+
+   if (0)
+   {
+      static int n = 0;
+      n++;
+      if (n == 10) {
+        fprintf(stderr, "Bufs %d Total emitted %d real changes %d (%.2f%%)\n",
+                bufs,
+                total, total_changed, 
+                ((float)total_changed/(float)total*100.0));
+        fprintf(stderr, "Total emitted per buf: %.2f\n",
+                (float)total/(float)bufs);
+        fprintf(stderr, "Real changes per buf: %.2f\n",
+                (float)total_changed/(float)bufs);
+
+        bufs = n = total = total_changed = 0;
+      }
+   }
+
+   fprintf(stderr, "leaving %s\n\n\n", __FUNCTION__);
+
+   return 0;
+}
diff --git a/src/mesa/drivers/dri/r200/r200_sanity.h b/src/mesa/drivers/dri/r200/r200_sanity.h
new file mode 100644 (file)
index 0000000..10260f2
--- /dev/null
@@ -0,0 +1,8 @@
+#ifndef R200_SANITY_H
+#define R200_SANITY_H
+
+extern int r200SanityCmdBuffer( r200ContextPtr rmesa,
+                               int nbox,
+                               XF86DRIClipRectRec *boxes );
+
+#endif
diff --git a/src/mesa/drivers/dri/r200/r200_screen.c b/src/mesa/drivers/dri/r200/r200_screen.c
new file mode 100644 (file)
index 0000000..ee52d68
--- /dev/null
@@ -0,0 +1,462 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include <dlfcn.h>
+
+#include "glheader.h"
+#include "imports.h"
+#include "context.h"
+
+#include "r200_screen.h"
+#include "r200_context.h"
+#include "r200_ioctl.h"
+
+#include "utils.h"
+#include "vblank.h"
+
+#ifndef _SOLO
+#include "glxextensions.h"
+#endif 
+
+#if 1
+/* Including xf86PciInfo.h introduces a bunch of errors...
+ */
+#define PCI_CHIP_R200_QD       0x5144
+#define PCI_CHIP_R200_QE       0x5145
+#define PCI_CHIP_R200_QF       0x5146
+#define PCI_CHIP_R200_QG       0x5147
+#define PCI_CHIP_R200_QY       0x5159
+#define PCI_CHIP_R200_QZ       0x515A
+#define PCI_CHIP_R200_LW       0x4C57 
+#define PCI_CHIP_R200_LY       0x4C59
+#define PCI_CHIP_R200_LZ       0x4C5A
+#define PCI_CHIP_RV200_QW      0x5157 /* Radeon 7500 - not an R200 at all */
+#endif
+
+static r200ScreenPtr __r200Screen;
+
+static int getSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo );
+
+/* Create the device specific screen private data struct.
+ */
+static r200ScreenPtr 
+r200CreateScreen( __DRIscreenPrivate *sPriv )
+{
+   r200ScreenPtr screen;
+   RADEONDRIPtr dri_priv = (RADEONDRIPtr)sPriv->pDevPriv;
+
+   if ( ! driCheckDriDdxDrmVersions( sPriv, "R200", 4, 0, 4, 0, 1, 5 ) )
+      return NULL;
+
+   /* Allocate the private area */
+   screen = (r200ScreenPtr) CALLOC( sizeof(*screen) );
+   if ( !screen ) {
+      __driUtilMessage("%s: Could not allocate memory for screen structure",
+                      __FUNCTION__);
+      return NULL;
+   }
+
+   switch ( dri_priv->deviceID ) {
+   case PCI_CHIP_R200_QD:
+   case PCI_CHIP_R200_QE:
+   case PCI_CHIP_R200_QF:
+   case PCI_CHIP_R200_QG:
+   case PCI_CHIP_R200_QY:
+   case PCI_CHIP_R200_QZ:
+   case PCI_CHIP_RV200_QW:
+   case PCI_CHIP_R200_LW:
+   case PCI_CHIP_R200_LY:
+   case PCI_CHIP_R200_LZ:
+      __driUtilMessage("r200CreateScreen(): Device isn't an r200!\n");
+      FREE( screen );
+      return NULL;      
+   default:
+      screen->chipset = R200_CHIPSET_R200;
+      break;
+   }
+
+
+   /* This is first since which regions we map depends on whether or
+    * not we are using a PCI card.
+    */
+   screen->IsPCI = dri_priv->IsPCI;
+
+   {
+      int ret;
+      drmRadeonGetParam gp;
+
+      gp.param = RADEON_PARAM_AGP_BUFFER_OFFSET;
+      gp.value = &screen->agp_buffer_offset;
+
+      ret = drmCommandWriteRead( sPriv->fd, DRM_RADEON_GETPARAM,
+                                &gp, sizeof(gp));
+      if (ret) {
+        FREE( screen );
+        fprintf(stderr, "drmRadeonGetParam (RADEON_PARAM_AGP_BUFFER_OFFSET): %d\n", ret);
+        return NULL;
+      }
+
+      screen->agp_texture_offset = 
+        screen->agp_buffer_offset + 2*1024*1024;
+
+
+      if (sPriv->drmMinor >= 6) {
+        gp.param = RADEON_PARAM_AGP_BASE;
+        gp.value = &screen->agp_base;
+
+        ret = drmCommandWriteRead( sPriv->fd, DRM_RADEON_GETPARAM,
+                                   &gp, sizeof(gp));
+        if (ret) {
+           FREE( screen );
+           fprintf(stderr, "drmR200GetParam (RADEON_PARAM_AGP_BASE): %d\n", ret);
+           return NULL;
+        }
+
+
+        gp.param = RADEON_PARAM_IRQ_NR;
+        gp.value = &screen->irq;
+
+        ret = drmCommandWriteRead( sPriv->fd, DRM_RADEON_GETPARAM,
+                                   &gp, sizeof(gp));
+        if (ret) {
+           FREE( screen );
+           fprintf(stderr, "drmRadeonGetParam (RADEON_PARAM_IRQ_NR): %d\n", ret);
+           return NULL;
+        }
+
+        /* Check if kernel module is new enough to support cube maps */
+        screen->drmSupportsCubeMaps = (sPriv->drmMinor >= 7);
+      }
+   }
+
+   screen->mmio.handle = dri_priv->registerHandle;
+   screen->mmio.size   = dri_priv->registerSize;
+   if ( drmMap( sPriv->fd,
+               screen->mmio.handle,
+               screen->mmio.size,
+               &screen->mmio.map ) ) {
+      FREE( screen );
+      __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ );
+      return NULL;
+   }
+
+   screen->status.handle = dri_priv->statusHandle;
+   screen->status.size   = dri_priv->statusSize;
+   if ( drmMap( sPriv->fd,
+               screen->status.handle,
+               screen->status.size,
+               &screen->status.map ) ) {
+      drmUnmap( screen->mmio.map, screen->mmio.size );
+      FREE( screen );
+      __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ );
+      return NULL;
+   }
+   screen->scratch = (__volatile__ CARD32 *)
+      ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET);
+
+   screen->buffers = drmMapBufs( sPriv->fd );
+   if ( !screen->buffers ) {
+      drmUnmap( screen->status.map, screen->status.size );
+      drmUnmap( screen->mmio.map, screen->mmio.size );
+      FREE( screen );
+      __driUtilMessage("%s: drmMapBufs failed\n", __FUNCTION__ );
+      return NULL;
+   }
+
+   if ( !screen->IsPCI ) {
+      screen->agpTextures.handle = dri_priv->agpTexHandle;
+      screen->agpTextures.size   = dri_priv->agpTexMapSize;
+      if ( drmMap( sPriv->fd,
+                  screen->agpTextures.handle,
+                  screen->agpTextures.size,
+                  (drmAddressPtr)&screen->agpTextures.map ) ) {
+        drmUnmapBufs( screen->buffers );
+        drmUnmap( screen->status.map, screen->status.size );
+        drmUnmap( screen->mmio.map, screen->mmio.size );
+        FREE( screen );
+         __driUtilMessage("%s: IsPCI failed\n", __FUNCTION__);
+        return NULL;
+      }
+   }
+
+
+
+   screen->cpp = dri_priv->bpp / 8;
+   screen->AGPMode = dri_priv->AGPMode;
+
+   screen->frontOffset = dri_priv->frontOffset;
+   screen->frontPitch  = dri_priv->frontPitch;
+   screen->backOffset  = dri_priv->backOffset;
+   screen->backPitch   = dri_priv->backPitch;
+   screen->depthOffset = dri_priv->depthOffset;
+   screen->depthPitch  = dri_priv->depthPitch;
+
+   screen->texOffset[RADEON_CARD_HEAP] = dri_priv->textureOffset;
+   screen->texSize[RADEON_CARD_HEAP] = dri_priv->textureSize;
+   screen->logTexGranularity[RADEON_CARD_HEAP] =
+      dri_priv->log2TexGran;
+
+   if ( screen->IsPCI ) {
+      screen->numTexHeaps = RADEON_NR_TEX_HEAPS - 1;
+      screen->texOffset[RADEON_AGP_HEAP] = 0;
+      screen->texSize[RADEON_AGP_HEAP] = 0;
+      screen->logTexGranularity[RADEON_AGP_HEAP] = 0;
+   } else {
+      screen->numTexHeaps = RADEON_NR_TEX_HEAPS;
+      screen->texOffset[RADEON_AGP_HEAP] =
+        dri_priv->agpTexOffset + R200_AGP_TEX_OFFSET;
+      screen->texSize[RADEON_AGP_HEAP] = dri_priv->agpTexMapSize;
+      screen->logTexGranularity[RADEON_AGP_HEAP] =
+        dri_priv->log2AGPTexGran;
+   }
+
+   screen->driScreen = sPriv;
+   screen->sarea_priv_offset = dri_priv->sarea_priv_offset;
+   return screen;
+}
+
+/* Destroy the device specific screen private data struct.
+ */
+static void 
+r200DestroyScreen( __DRIscreenPrivate *sPriv )
+{
+   r200ScreenPtr screen = (r200ScreenPtr)sPriv->private;
+
+   if (!screen)
+      return;
+
+   if ( !screen->IsPCI ) {
+      drmUnmap( screen->agpTextures.map,
+               screen->agpTextures.size );
+   }
+   drmUnmapBufs( screen->buffers );
+   drmUnmap( screen->status.map, screen->status.size );
+   drmUnmap( screen->mmio.map, screen->mmio.size );
+
+   FREE( screen );
+   sPriv->private = NULL;
+}
+
+
+/* Initialize the driver specific screen private data.
+ */
+static GLboolean
+r200InitDriver( __DRIscreenPrivate *sPriv )
+{
+   __r200Screen = r200CreateScreen( sPriv );
+
+   sPriv->private = (void *) __r200Screen;
+
+   return sPriv->private ? GL_TRUE : GL_FALSE;
+}
+
+
+
+/* Create and initialize the Mesa and driver specific pixmap buffer
+ * data.
+ */
+static GLboolean
+r200CreateBuffer( __DRIscreenPrivate *driScrnPriv,
+                  __DRIdrawablePrivate *driDrawPriv,
+                  const __GLcontextModes *mesaVis,
+                  GLboolean isPixmap )
+{
+   if (isPixmap) {
+      return GL_FALSE; /* not implemented */
+   }
+   else {
+      const GLboolean swDepth = GL_FALSE;
+      const GLboolean swAlpha = GL_FALSE;
+      const GLboolean swAccum = mesaVis->accumRedBits > 0;
+      const GLboolean swStencil = mesaVis->stencilBits > 0 &&
+         mesaVis->depthBits != 24;
+      driDrawPriv->driverPrivate = (void *)
+         _mesa_create_framebuffer( mesaVis,
+                                   swDepth,
+                                   swStencil,
+                                   swAccum,
+                                   swAlpha );
+      return (driDrawPriv->driverPrivate != NULL);
+   }
+}
+
+
+static void
+r200DestroyBuffer(__DRIdrawablePrivate *driDrawPriv)
+{
+   _mesa_destroy_framebuffer((GLframebuffer *) (driDrawPriv->driverPrivate));
+}
+
+
+
+
+/* Fullscreen mode isn't used for much -- could be a way to shrink
+ * front/back buffers & get more texture memory if the client has
+ * changed the video resolution.
+ * 
+ * Pageflipping is now done automatically whenever there is a single
+ * 3d client.
+ */
+static GLboolean
+r200OpenCloseFullScreen( __DRIcontextPrivate *driContextPriv )
+{
+   return GL_TRUE;
+}
+
+static struct __DriverAPIRec r200API = {
+   .InitDriver      = r200InitDriver,
+   .DestroyScreen   = r200DestroyScreen,
+   .CreateContext   = r200CreateContext,
+   .DestroyContext  = r200DestroyContext,
+   .CreateBuffer    = r200CreateBuffer,
+   .DestroyBuffer   = r200DestroyBuffer,
+   .SwapBuffers     = r200SwapBuffers,
+   .MakeCurrent     = r200MakeCurrent,
+   .UnbindContext   = r200UnbindContext,
+   .OpenFullScreen  = r200OpenCloseFullScreen,
+   .CloseFullScreen = r200OpenCloseFullScreen,
+   .GetSwapInfo     = getSwapInfo,
+   .GetMSC          = driGetMSC32,
+   .WaitForMSC      = driWaitForMSC32,
+   .WaitForSBC      = NULL,
+   .SwapBuffersMSC  = NULL
+};
+
+
+/*
+ * This is the bootstrap function for the driver.
+ * The __driCreateScreen name is the symbol that libGL.so fetches.
+ * Return:  pointer to a __DRIscreenPrivate.
+ *
+ */
+#ifndef _SOLO
+void *__driCreateScreen(Display *dpy, int scrn, __DRIscreen *psc,
+                        int numConfigs, __GLXvisualConfig *config)
+{
+   __DRIscreenPrivate *psp;
+   psp = __driUtilCreateScreen(dpy, scrn, psc, numConfigs, config, &r200API);
+   return (void *) psp;
+}
+#else
+void *__driCreateScreen(struct DRIDriverRec *driver,
+                        struct DRIDriverContextRec *driverContext)
+{
+   __DRIscreenPrivate *psp;
+   psp = __driUtilCreateScreen(driver, driverContext, &r200API);
+   return (void *) psp;
+}
+#endif
+
+
+
+#ifndef _SOLO
+/* This function is called by libGL.so to allow the driver to dynamically
+ * extend libGL.  We can add new GLX functions and/or new GL functions.
+ * Note that _mesa_create_context() will probably add most of the newer
+ * OpenGL extension functions into the dispatcher.
+ */
+void
+__driRegisterExtensions( void )
+{
+   PFNGLXENABLEEXTENSIONPROC glx_enable_extension;
+   typedef void *(*registerFunc)(const char *funcName, void *funcAddr);
+   registerFunc regFunc;
+
+
+   if ( driCompareGLXAPIVersion( 20030317 ) >= 0 ) {
+      glx_enable_extension = (PFNGLXENABLEEXTENSIONPROC)
+         glXGetProcAddress( "__glXEnableExtension" );
+
+      if ( glx_enable_extension != NULL ) {
+        glx_enable_extension( "GLX_SGI_swap_control", GL_FALSE );
+        glx_enable_extension( "GLX_SGI_video_sync", GL_FALSE );
+        glx_enable_extension( "GLX_MESA_swap_control", GL_FALSE );
+        glx_enable_extension( "GLX_MESA_swap_frame_usage", GL_FALSE );
+
+
+        /* Get pointers to libGL's __glXRegisterGLXFunction
+         * and __glXRegisterGLXExtensionString, if they exist.
+         */
+        regFunc = (registerFunc) glXGetProcAddress( "__glXRegisterGLXFunction" );
+
+        if (regFunc) {
+           /* register our GLX extensions with libGL */
+           void *p;
+           p = regFunc("glXAllocateMemoryNV", (void *) r200AllocateMemoryNV);
+           if (p)
+               ;  /* XXX already registered - what to do, wrap? */
+
+           p = regFunc("glXFreeMemoryNV", (void *) r200FreeMemoryNV);
+           if (p)
+               ;  /* XXX already registered - what to do, wrap? */
+
+           p = regFunc("glXGetAGPOffsetMESA", (void *) r200GetAGPOffset);
+           if (p)
+               ;  /* XXX already registered - what to do, wrap? */
+
+           glx_enable_extension( "GLX_NV_vertex_array_range", GL_TRUE );
+           glx_enable_extension( "GLX_MESA_agp_offset", GL_TRUE );
+        }
+      }
+   }
+}
+#endif
+
+/**
+ * Get information about previous buffer swaps.
+ */
+static int
+getSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo )
+{
+   r200ContextPtr  rmesa;
+
+   if ( (dPriv == NULL) || (dPriv->driContextPriv == NULL)
+       || (dPriv->driContextPriv->driverPrivate == NULL)
+       || (sInfo == NULL) ) {
+      return -1;
+   }
+
+   rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate;
+   sInfo->swap_count = rmesa->swap_count;
+   sInfo->swap_ust = rmesa->swap_ust;
+   sInfo->swap_missed_count = rmesa->swap_missed_count;
+
+   sInfo->swap_missed_usage = (sInfo->swap_missed_count != 0)
+       ? driCalculateSwapUsage( dPriv, 0, rmesa->swap_missed_ust )
+       : 0.0;
+
+   return 0;
+}
diff --git a/src/mesa/drivers/dri/r200/r200_screen.h b/src/mesa/drivers/dri/r200/r200_screen.h
new file mode 100644 (file)
index 0000000..1e1f125
--- /dev/null
@@ -0,0 +1,99 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef __R200_SCREEN_H__
+#define __R200_SCREEN_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+#include "dri_util.h"
+#include "xf86drm.h"
+#include "radeon_common.h"
+#include "radeon_sarea.h"
+
+typedef struct {
+   drmHandle handle;                   /* Handle to the DRM region */
+   drmSize size;                       /* Size of the DRM region */
+   drmAddress map;                     /* Mapping of the DRM region */
+} r200RegionRec, *r200RegionPtr;
+
+#define R200_CHIPSET_R200   1
+#define R200_CHIPSET_MOBILITY 2
+
+
+#define R200_NR_TEX_HEAPS 2
+
+typedef struct {
+
+   int chipset;
+   int cpp;
+   int IsPCI;                          /* Current card is a PCI card */
+   int AGPMode;
+   unsigned int irq;                   /* IRQ number (0 means none) */
+
+   unsigned int frontOffset;
+   unsigned int frontPitch;
+   unsigned int backOffset;
+   unsigned int backPitch;
+
+   unsigned int depthOffset;
+   unsigned int depthPitch;
+
+    /* Shared texture data */
+   int numTexHeaps;
+   int texOffset[R200_NR_TEX_HEAPS];
+   int texSize[R200_NR_TEX_HEAPS];
+   int logTexGranularity[R200_NR_TEX_HEAPS];
+
+   r200RegionRec mmio;
+   r200RegionRec status;
+   r200RegionRec agpTextures;
+
+   drmBufMapPtr buffers;
+
+   __volatile__ CARD32 *scratch;
+
+   __DRIscreenPrivate *driScreen;
+   unsigned int sarea_priv_offset;
+   unsigned int agp_buffer_offset;     /* offset in card memory space */
+   unsigned int agp_texture_offset;    /* offset in card memory space */
+   unsigned int agp_base;
+
+   GLboolean drmSupportsCubeMaps;       /* need radeon kernel module >=1.7 */
+} r200ScreenRec, *r200ScreenPtr;
+
+#endif
+#endif /* __R200_SCREEN_H__ */
diff --git a/src/mesa/drivers/dri/r200/r200_span.c b/src/mesa/drivers/dri/r200/r200_span.c
new file mode 100644 (file)
index 0000000..b469aec
--- /dev/null
@@ -0,0 +1,433 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "glheader.h"
+#include "imports.h"
+#include "swrast/swrast.h"
+#include "colormac.h"
+
+#include "r200_context.h"
+#include "r200_ioctl.h"
+#include "r200_state.h"
+#include "r200_span.h"
+#include "r200_tex.h"
+
+#define DBG 0
+
+#define LOCAL_VARS                                                     \
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);                   \
+   r200ScreenPtr r200Screen = rmesa->r200Screen;                       \
+   __DRIscreenPrivate *sPriv = rmesa->dri.screen;                      \
+   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;                  \
+   GLuint pitch = r200Screen->frontPitch * r200Screen->cpp;            \
+   GLuint height = dPriv->h;                                           \
+   char *buf = (char *)(sPriv->pFB +                                   \
+                       rmesa->state.color.drawOffset +                 \
+                       (dPriv->x * r200Screen->cpp) +          \
+                       (dPriv->y * pitch));                            \
+   char *read_buf = (char *)(sPriv->pFB +                              \
+                            rmesa->state.pixel.readOffset +            \
+                            (dPriv->x * r200Screen->cpp) +             \
+                            (dPriv->y * pitch));                       \
+   GLuint p;                                                           \
+   (void) read_buf; (void) buf; (void) p
+
+#define LOCAL_DEPTH_VARS                                               \
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);                   \
+   r200ScreenPtr r200Screen = rmesa->r200Screen;                       \
+   __DRIscreenPrivate *sPriv = rmesa->dri.screen;                      \
+   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;                  \
+   GLuint height = dPriv->h;                                           \
+   GLuint xo = dPriv->x;                                               \
+   GLuint yo = dPriv->y;                                               \
+   char *buf = (char *)(sPriv->pFB + r200Screen->depthOffset); \
+   (void) buf
+
+#define LOCAL_STENCIL_VARS     LOCAL_DEPTH_VARS
+
+
+#define CLIPPIXEL( _x, _y )                                            \
+   ((_x >= minx) && (_x < maxx) && (_y >= miny) && (_y < maxy))
+
+
+#define CLIPSPAN( _x, _y, _n, _x1, _n1, _i )                           \
+   if ( _y < miny || _y >= maxy ) {                                    \
+      _n1 = 0, _x1 = x;                                                        \
+   } else {                                                            \
+      _n1 = _n;                                                                \
+      _x1 = _x;                                                                \
+      if ( _x1 < minx ) _i += (minx-_x1), n1 -= (minx-_x1), _x1 = minx; \
+      if ( _x1 + _n1 >= maxx ) n1 -= (_x1 + n1 - maxx);                        \
+   }
+
+#define Y_FLIP( _y )           (height - _y - 1)
+
+
+#define HW_LOCK() 
+
+#define HW_CLIPLOOP()                                                  \
+   do {                                                                        \
+      __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;               \
+      int _nc = dPriv->numClipRects;                                   \
+                                                                       \
+      while ( _nc-- ) {                                                        \
+        int minx = dPriv->pClipRects[_nc].x1 - dPriv->x;               \
+        int miny = dPriv->pClipRects[_nc].y1 - dPriv->y;               \
+        int maxx = dPriv->pClipRects[_nc].x2 - dPriv->x;               \
+        int maxy = dPriv->pClipRects[_nc].y2 - dPriv->y;
+
+#define HW_ENDCLIPLOOP()                                               \
+      }                                                                        \
+   } while (0)
+
+#define HW_UNLOCK()                                                    
+
+
+
+/* ================================================================
+ * Color buffer
+ */
+
+/* 16 bit, RGB565 color spanline and pixel functions
+ */
+#define INIT_MONO_PIXEL(p, color) \
+  p = PACK_COLOR_565( color[0], color[1], color[2] )
+
+#define WRITE_RGBA( _x, _y, r, g, b, a )                               \
+   *(GLushort *)(buf + _x*2 + _y*pitch) = ((((int)r & 0xf8) << 8) |    \
+                                          (((int)g & 0xfc) << 3) |     \
+                                          (((int)b & 0xf8) >> 3))
+
+#define WRITE_PIXEL( _x, _y, p )                                       \
+   *(GLushort *)(buf + _x*2 + _y*pitch) = p
+
+#define READ_RGBA( rgba, _x, _y )                                      \
+   do {                                                                        \
+      GLushort p = *(GLushort *)(read_buf + _x*2 + _y*pitch);          \
+      rgba[0] = ((p >> 8) & 0xf8) * 255 / 0xf8;                                \
+      rgba[1] = ((p >> 3) & 0xfc) * 255 / 0xfc;                                \
+      rgba[2] = ((p << 3) & 0xf8) * 255 / 0xf8;                                \
+      rgba[3] = 0xff;                                                  \
+   } while (0)
+
+#define TAG(x) r200##x##_RGB565
+#include "spantmp.h"
+
+/* 32 bit, ARGB8888 color spanline and pixel functions
+ */
+#undef INIT_MONO_PIXEL
+#define INIT_MONO_PIXEL(p, color) \
+  p = PACK_COLOR_8888( color[3], color[0], color[1], color[2] )
+
+#define WRITE_RGBA( _x, _y, r, g, b, a )                       \
+do {                                                           \
+   *(GLuint *)(buf + _x*4 + _y*pitch) = ((b <<  0) |           \
+                                        (g <<  8) |            \
+                                        (r << 16) |            \
+                                        (a << 24) );           \
+} while (0)
+
+#define WRITE_PIXEL( _x, _y, p )                       \
+do {                                                   \
+   *(GLuint *)(buf + _x*4 + _y*pitch) = p;             \
+} while (0)
+
+#define READ_RGBA( rgba, _x, _y )                              \
+do {                                                           \
+   volatile GLuint *ptr = (volatile GLuint *)(read_buf + _x*4 + _y*pitch); \
+   GLuint p = *ptr;                                    \
+   rgba[0] = (p >> 16) & 0xff;                                 \
+   rgba[1] = (p >>  8) & 0xff;                                 \
+   rgba[2] = (p >>  0) & 0xff;                                 \
+   rgba[3] = (p >> 24) & 0xff;                                 \
+} while (0)
+
+#define TAG(x) r200##x##_ARGB8888
+#include "spantmp.h"
+
+
+
+/* ================================================================
+ * Depth buffer
+ */
+
+/* The Radeon family has depth tiling on all the time, so we have to convert
+ * the x,y coordinates into the memory bus address (mba) in the same
+ * manner as the engine.  In each case, the linear block address (ba)
+ * is calculated, and then wired with x and y to produce the final
+ * memory address.
+ */
+
+#define BIT(x,b) ((x & (1<<b))>>b)
+static GLuint r200_mba_z32( r200ContextPtr rmesa,
+                                      GLint x, GLint y )
+{
+   GLuint pitch = rmesa->r200Screen->frontPitch;
+   GLuint b = ((y & 0x3FF) >> 4) * ((pitch & 0xFFF) >> 5) + ((x & 0x3FF) >> 5);
+   GLuint a = 
+      (BIT(x,0) << 2) |
+      (BIT(y,0) << 3) |
+      (BIT(x,1) << 4) |
+      (BIT(y,1) << 5) |
+      (BIT(x,3) << 6) |
+      (BIT(x,4) << 7) |
+      (BIT(x,2) << 8) |
+      (BIT(y,2) << 9) |
+      (BIT(y,3) << 10) |
+      (((pitch & 0x20) ? (b & 0x01) : ((b & 0x01) ^ (BIT(y,4)))) << 11) |
+      ((b >> 1) << 12);
+   return a;
+}
+
+static GLuint r200_mba_z16( r200ContextPtr rmesa, GLint x, GLint y )
+{
+   GLuint pitch = rmesa->r200Screen->frontPitch;
+   GLuint b = ((y & 0x3FF) >> 4) * ((pitch & 0xFFF) >> 6) + ((x & 0x3FF) >> 6);
+   GLuint a = 
+      (BIT(x,0) << 1) |
+      (BIT(y,0) << 2) |
+      (BIT(x,1) << 3) |
+      (BIT(y,1) << 4) |
+      (BIT(x,2) << 5) |
+      (BIT(x,4) << 6) |
+      (BIT(x,5) << 7) |
+      (BIT(x,3) << 8) |
+      (BIT(y,2) << 9) |
+      (BIT(y,3) << 10) |
+      (((pitch & 0x40) ? (b & 0x01) : ((b & 0x01) ^ (BIT(y,4)))) << 11) |
+      ((b >> 1) << 12);
+   return a;
+}
+
+
+/* 16-bit depth buffer functions
+ */
+#define WRITE_DEPTH( _x, _y, d )                                       \
+   *(GLushort *)(buf + r200_mba_z16( rmesa, _x + xo, _y + yo )) = d;
+
+#define READ_DEPTH( d, _x, _y )                                                \
+   d = *(GLushort *)(buf + r200_mba_z16( rmesa, _x + xo, _y + yo ));
+
+#define TAG(x) r200##x##_16
+#include "depthtmp.h"
+
+/* 24 bit depth, 8 bit stencil depthbuffer functions
+ */
+#define WRITE_DEPTH( _x, _y, d )                                       \
+do {                                                                   \
+   GLuint offset = r200_mba_z32( rmesa, _x + xo, _y + yo );            \
+   GLuint tmp = *(GLuint *)(buf + offset);                             \
+   tmp &= 0xff000000;                                                  \
+   tmp |= ((d) & 0x00ffffff);                                          \
+   *(GLuint *)(buf + offset) = tmp;                                    \
+} while (0)
+
+#define READ_DEPTH( d, _x, _y )                                                \
+   d = *(GLuint *)(buf + r200_mba_z32( rmesa, _x + xo,         \
+                                        _y + yo )) & 0x00ffffff;
+
+#define TAG(x) r200##x##_24_8
+#include "depthtmp.h"
+
+
+/* ================================================================
+ * Stencil buffer
+ */
+
+/* 24 bit depth, 8 bit stencil depthbuffer functions
+ */
+#define WRITE_STENCIL( _x, _y, d )                                     \
+do {                                                                   \
+   GLuint offset = r200_mba_z32( rmesa, _x + xo, _y + yo );            \
+   GLuint tmp = *(GLuint *)(buf + offset);                             \
+   tmp &= 0x00ffffff;                                                  \
+   tmp |= (((d) & 0xff) << 24);                                                \
+   *(GLuint *)(buf + offset) = tmp;                                    \
+} while (0)
+
+#define READ_STENCIL( d, _x, _y )                                      \
+do {                                                                   \
+   GLuint offset = r200_mba_z32( rmesa, _x + xo, _y + yo );            \
+   GLuint tmp = *(GLuint *)(buf + offset);                             \
+   tmp &= 0xff000000;                                                  \
+   d = tmp >> 24;                                                      \
+} while (0)
+
+#define TAG(x) r200##x##_24_8
+#include "stenciltmp.h"
+
+
+/*
+ * This function is called to specify which buffer to read and write
+ * for software rasterization (swrast) fallbacks.  This doesn't necessarily
+ * correspond to glDrawBuffer() or glReadBuffer() calls.
+ */
+static void r200SetBuffer( GLcontext *ctx,
+                           GLframebuffer *colorBuffer,
+                           GLuint bufferBit )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   switch ( bufferBit ) {
+   case FRONT_LEFT_BIT:
+      if ( rmesa->doPageFlip && rmesa->sarea->pfCurrentPage == 1 ) {
+        rmesa->state.pixel.readOffset = rmesa->r200Screen->backOffset;
+        rmesa->state.pixel.readPitch  = rmesa->r200Screen->backPitch;
+        rmesa->state.color.drawOffset = rmesa->r200Screen->backOffset;
+        rmesa->state.color.drawPitch  = rmesa->r200Screen->backPitch;
+      } else {
+       rmesa->state.pixel.readOffset = rmesa->r200Screen->frontOffset;
+       rmesa->state.pixel.readPitch  = rmesa->r200Screen->frontPitch;
+       rmesa->state.color.drawOffset = rmesa->r200Screen->frontOffset;
+       rmesa->state.color.drawPitch  = rmesa->r200Screen->frontPitch;
+      }
+      break;
+   case BACK_LEFT_BIT:
+      if ( rmesa->doPageFlip && rmesa->sarea->pfCurrentPage == 1 ) {
+       rmesa->state.pixel.readOffset = rmesa->r200Screen->frontOffset;
+       rmesa->state.pixel.readPitch  = rmesa->r200Screen->frontPitch;
+       rmesa->state.color.drawOffset = rmesa->r200Screen->frontOffset;
+       rmesa->state.color.drawPitch  = rmesa->r200Screen->frontPitch;
+      } else {
+        rmesa->state.pixel.readOffset = rmesa->r200Screen->backOffset;
+        rmesa->state.pixel.readPitch  = rmesa->r200Screen->backPitch;
+        rmesa->state.color.drawOffset = rmesa->r200Screen->backOffset;
+        rmesa->state.color.drawPitch  = rmesa->r200Screen->backPitch;
+      }
+      break;
+   default:
+      _mesa_problem(ctx, "Bad bufferBit in %s", __FUNCTION__);
+      break;
+   }
+}
+
+/* Move locking out to get reasonable span performance (10x better
+ * than doing this in HW_LOCK above).  WaitForIdle() is the main
+ * culprit.
+ */
+
+static void r200SpanRenderStart( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+
+   R200_FIREVERTICES( rmesa );
+   LOCK_HARDWARE( rmesa );
+   r200WaitForIdleLocked( rmesa );
+
+   /* Read & rewrite the first pixel in the frame buffer.  This should
+    * be a noop, right?  In fact without this conform fails as reading
+    * from the framebuffer sometimes produces old results -- the
+    * on-card read cache gets mixed up and doesn't notice that the
+    * framebuffer has been updated.
+    *
+    * In the worst case this is buggy too as p might get the wrong
+    * value first time, so really need a hidden pixel somewhere for this.
+    */
+   {
+      int p;
+      volatile int *read_buf = (volatile int *)(rmesa->dri.screen->pFB + 
+                                               rmesa->state.pixel.readOffset);
+      p = *read_buf;
+      *read_buf = p;
+   }
+}
+
+static void r200SpanRenderFinish( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+   _swrast_flush( ctx );
+   UNLOCK_HARDWARE( rmesa );
+}
+
+void r200InitSpanFuncs( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx);
+
+   swdd->SetBuffer = r200SetBuffer;
+
+   switch ( rmesa->r200Screen->cpp ) {
+   case 2:
+      swdd->WriteRGBASpan      = r200WriteRGBASpan_RGB565;
+      swdd->WriteRGBSpan       = r200WriteRGBSpan_RGB565;
+      swdd->WriteMonoRGBASpan  = r200WriteMonoRGBASpan_RGB565;
+      swdd->WriteRGBAPixels    = r200WriteRGBAPixels_RGB565;
+      swdd->WriteMonoRGBAPixels        = r200WriteMonoRGBAPixels_RGB565;
+      swdd->ReadRGBASpan       = r200ReadRGBASpan_RGB565;
+      swdd->ReadRGBAPixels      = r200ReadRGBAPixels_RGB565;
+      break;
+
+   case 4:
+      swdd->WriteRGBASpan      = r200WriteRGBASpan_ARGB8888;
+      swdd->WriteRGBSpan       = r200WriteRGBSpan_ARGB8888;
+      swdd->WriteMonoRGBASpan   = r200WriteMonoRGBASpan_ARGB8888;
+      swdd->WriteRGBAPixels     = r200WriteRGBAPixels_ARGB8888;
+      swdd->WriteMonoRGBAPixels = r200WriteMonoRGBAPixels_ARGB8888;
+      swdd->ReadRGBASpan       = r200ReadRGBASpan_ARGB8888;
+      swdd->ReadRGBAPixels      = r200ReadRGBAPixels_ARGB8888;
+      break;
+
+   default:
+      break;
+   }
+
+   switch ( rmesa->glCtx->Visual.depthBits ) {
+   case 16:
+      swdd->ReadDepthSpan      = r200ReadDepthSpan_16;
+      swdd->WriteDepthSpan     = r200WriteDepthSpan_16;
+      swdd->ReadDepthPixels    = r200ReadDepthPixels_16;
+      swdd->WriteDepthPixels   = r200WriteDepthPixels_16;
+      break;
+
+   case 24:
+      swdd->ReadDepthSpan      = r200ReadDepthSpan_24_8;
+      swdd->WriteDepthSpan     = r200WriteDepthSpan_24_8;
+      swdd->ReadDepthPixels    = r200ReadDepthPixels_24_8;
+      swdd->WriteDepthPixels   = r200WriteDepthPixels_24_8;
+
+      swdd->ReadStencilSpan    = r200ReadStencilSpan_24_8;
+      swdd->WriteStencilSpan   = r200WriteStencilSpan_24_8;
+      swdd->ReadStencilPixels  = r200ReadStencilPixels_24_8;
+      swdd->WriteStencilPixels = r200WriteStencilPixels_24_8;
+      break;
+
+   default:
+      break;
+   }
+
+   swdd->SpanRenderStart          = r200SpanRenderStart;
+   swdd->SpanRenderFinish         = r200SpanRenderFinish; 
+}
diff --git a/src/mesa/drivers/dri/r200/r200_span.h b/src/mesa/drivers/dri/r200/r200_span.h
new file mode 100644 (file)
index 0000000..269ac05
--- /dev/null
@@ -0,0 +1,45 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef __R200_SPAN_H__
+#define __R200_SPAN_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+extern void r200InitSpanFuncs( GLcontext *ctx );
+
+#endif
+#endif
diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c
new file mode 100644 (file)
index 0000000..55a00cd
--- /dev/null
@@ -0,0 +1,2175 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "glheader.h"
+#include "imports.h"
+#include "api_arrayelt.h"
+#include "enums.h"
+#include "colormac.h"
+
+#include "swrast/swrast.h"
+#include "array_cache/acache.h"
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+#include "swrast_setup/swrast_setup.h"
+
+
+#include "r200_context.h"
+#include "r200_ioctl.h"
+#include "r200_state.h"
+#include "r200_tcl.h"
+#include "r200_tex.h"
+#include "r200_swtcl.h"
+#include "r200_vtxfmt.h"
+
+
+/* =============================================================
+ * Alpha blending
+ */
+
+static void r200AlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   int pp_misc = rmesa->hw.ctx.cmd[CTX_PP_MISC];
+   GLubyte refByte;
+
+   CLAMPED_FLOAT_TO_UBYTE(refByte, ref);
+
+   R200_STATECHANGE( rmesa, ctx );
+
+   pp_misc &= ~(R200_ALPHA_TEST_OP_MASK | R200_REF_ALPHA_MASK);
+   pp_misc |= (refByte & R200_REF_ALPHA_MASK);
+
+   switch ( func ) {
+   case GL_NEVER:
+      pp_misc |= R200_ALPHA_TEST_FAIL; 
+      break;
+   case GL_LESS:
+      pp_misc |= R200_ALPHA_TEST_LESS;
+      break;
+   case GL_EQUAL:
+      pp_misc |= R200_ALPHA_TEST_EQUAL;
+      break;
+   case GL_LEQUAL:
+      pp_misc |= R200_ALPHA_TEST_LEQUAL;
+      break;
+   case GL_GREATER:
+      pp_misc |= R200_ALPHA_TEST_GREATER;
+      break;
+   case GL_NOTEQUAL:
+      pp_misc |= R200_ALPHA_TEST_NEQUAL;
+      break;
+   case GL_GEQUAL:
+      pp_misc |= R200_ALPHA_TEST_GEQUAL;
+      break;
+   case GL_ALWAYS:
+      pp_misc |= R200_ALPHA_TEST_PASS;
+      break;
+   }
+
+   rmesa->hw.ctx.cmd[CTX_PP_MISC] = pp_misc;
+}
+
+static void r200BlendEquation( GLcontext *ctx, GLenum mode )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & ~R200_COMB_FCN_MASK;
+
+   switch ( mode ) {
+   case GL_FUNC_ADD:
+   case GL_LOGIC_OP:
+      b |= R200_COMB_FCN_ADD_CLAMP;
+      break;
+
+   case GL_FUNC_SUBTRACT:
+      b |= R200_COMB_FCN_SUB_CLAMP;
+      break;
+
+   case GL_FUNC_REVERSE_SUBTRACT:
+      b |= R200_COMB_FCN_RSUB_CLAMP;
+      break;
+
+   case GL_MIN:
+      b |= R200_COMB_FCN_MIN;
+      break;
+
+   case GL_MAX:
+      b |= R200_COMB_FCN_MAX;
+      break;
+
+   default:
+      break;
+   }
+
+   R200_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = b;
+   if ( ctx->Color.ColorLogicOpEnabled ) {
+      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_ROP_ENABLE;
+   } else {
+      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_ROP_ENABLE;
+   }
+}
+
+static void r200BlendFunc( GLcontext *ctx, GLenum sfactor, GLenum dfactor )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & 
+      ~(R200_SRC_BLEND_MASK | R200_DST_BLEND_MASK);
+
+   switch ( ctx->Color.BlendSrcRGB ) {
+   case GL_ZERO:
+      b |= R200_SRC_BLEND_GL_ZERO;
+      break;
+   case GL_ONE:
+      b |= R200_SRC_BLEND_GL_ONE;
+      break;
+   case GL_DST_COLOR:
+      b |= R200_SRC_BLEND_GL_DST_COLOR;
+      break;
+   case GL_ONE_MINUS_DST_COLOR:
+      b |= R200_SRC_BLEND_GL_ONE_MINUS_DST_COLOR;
+      break;
+   case GL_SRC_COLOR:
+      b |= R200_SRC_BLEND_GL_SRC_COLOR;
+      break;
+   case GL_ONE_MINUS_SRC_COLOR:
+      b |= R200_SRC_BLEND_GL_ONE_MINUS_SRC_COLOR;
+      break;
+   case GL_SRC_ALPHA:
+      b |= R200_SRC_BLEND_GL_SRC_ALPHA;
+      break;
+   case GL_ONE_MINUS_SRC_ALPHA:
+      b |= R200_SRC_BLEND_GL_ONE_MINUS_SRC_ALPHA;
+      break;
+   case GL_DST_ALPHA:
+      b |= R200_SRC_BLEND_GL_DST_ALPHA;
+      break;
+   case GL_ONE_MINUS_DST_ALPHA:
+      b |= R200_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA;
+      break;
+   case GL_SRC_ALPHA_SATURATE:
+      b |= R200_SRC_BLEND_GL_SRC_ALPHA_SATURATE;
+      break;
+   case GL_CONSTANT_COLOR:
+      b |= R200_SRC_BLEND_GL_CONST_COLOR;
+      break;
+   case GL_ONE_MINUS_CONSTANT_COLOR:
+      b |= R200_SRC_BLEND_GL_ONE_MINUS_CONST_COLOR;
+      break;
+   case GL_CONSTANT_ALPHA:
+      b |= R200_SRC_BLEND_GL_CONST_ALPHA;
+      break;
+   case GL_ONE_MINUS_CONSTANT_ALPHA:
+      b |= R200_SRC_BLEND_GL_ONE_MINUS_CONST_ALPHA;
+      break;
+   default:
+      break;
+   }
+
+   switch ( ctx->Color.BlendDstRGB ) {
+   case GL_ZERO:
+      b |= R200_DST_BLEND_GL_ZERO;
+      break;
+   case GL_ONE:
+      b |= R200_DST_BLEND_GL_ONE;
+      break;
+   case GL_SRC_COLOR:
+      b |= R200_DST_BLEND_GL_SRC_COLOR;
+      break;
+   case GL_ONE_MINUS_SRC_COLOR:
+      b |= R200_DST_BLEND_GL_ONE_MINUS_SRC_COLOR;
+      break;
+   case GL_SRC_ALPHA:
+      b |= R200_DST_BLEND_GL_SRC_ALPHA;
+      break;
+   case GL_ONE_MINUS_SRC_ALPHA:
+      b |= R200_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA;
+      break;
+   case GL_DST_COLOR:
+      b |= R200_DST_BLEND_GL_DST_COLOR;
+      break;
+   case GL_ONE_MINUS_DST_COLOR:
+      b |= R200_DST_BLEND_GL_ONE_MINUS_DST_COLOR;
+      break;
+   case GL_DST_ALPHA:
+      b |= R200_DST_BLEND_GL_DST_ALPHA;
+      break;
+   case GL_ONE_MINUS_DST_ALPHA:
+      b |= R200_DST_BLEND_GL_ONE_MINUS_DST_ALPHA;
+      break;
+   case GL_CONSTANT_COLOR:
+      b |= R200_DST_BLEND_GL_CONST_COLOR;
+      break;
+   case GL_ONE_MINUS_CONSTANT_COLOR:
+      b |= R200_DST_BLEND_GL_ONE_MINUS_CONST_COLOR;
+      break;
+   case GL_CONSTANT_ALPHA:
+      b |= R200_DST_BLEND_GL_CONST_ALPHA;
+      break;
+   case GL_ONE_MINUS_CONSTANT_ALPHA:
+      b |= R200_DST_BLEND_GL_ONE_MINUS_CONST_ALPHA;
+      break;
+   default:
+      break;
+   }
+
+   R200_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = b;
+}
+
+static void r200BlendFuncSeparate( GLcontext *ctx,
+                                    GLenum sfactorRGB, GLenum dfactorRGB,
+                                    GLenum sfactorA, GLenum dfactorA )
+{
+   r200BlendFunc( ctx, sfactorRGB, dfactorRGB );
+}
+
+
+/* =============================================================
+ * Depth testing
+ */
+
+static void r200DepthFunc( GLcontext *ctx, GLenum func )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   R200_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~R200_Z_TEST_MASK;
+
+   switch ( ctx->Depth.Func ) {
+   case GL_NEVER:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_NEVER;
+      break;
+   case GL_LESS:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_LESS;
+      break;
+   case GL_EQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_EQUAL;
+      break;
+   case GL_LEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_LEQUAL;
+      break;
+   case GL_GREATER:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_GREATER;
+      break;
+   case GL_NOTEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_NEQUAL;
+      break;
+   case GL_GEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_GEQUAL;
+      break;
+   case GL_ALWAYS:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_ALWAYS;
+      break;
+   }
+}
+
+
+static void r200DepthMask( GLcontext *ctx, GLboolean flag )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   R200_STATECHANGE( rmesa, ctx );
+
+   if ( ctx->Depth.Mask ) {
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |=  R200_Z_WRITE_ENABLE;
+   } else {
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~R200_Z_WRITE_ENABLE;
+   }
+}
+
+
+/* =============================================================
+ * Fog
+ */
+
+
+static void r200Fogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   union { int i; float f; } c, d;
+   GLchan col[4];
+   GLuint i;
+
+   c.i = rmesa->hw.fog.cmd[FOG_C];
+   d.i = rmesa->hw.fog.cmd[FOG_D];
+
+   switch (pname) {
+   case GL_FOG_MODE:
+      if (!ctx->Fog.Enabled)
+        return;
+      R200_STATECHANGE(rmesa, tcl);
+      rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~R200_TCL_FOG_MASK;
+      switch (ctx->Fog.Mode) {
+      case GL_LINEAR:
+        rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= R200_TCL_FOG_LINEAR;
+        if (ctx->Fog.Start == ctx->Fog.End) {
+           c.f = 1.0F;
+           d.f = 1.0F;
+        }
+        else {
+           c.f = ctx->Fog.End/(ctx->Fog.End-ctx->Fog.Start);
+           d.f = -1.0/(ctx->Fog.End-ctx->Fog.Start);
+        }
+        break;
+      case GL_EXP:
+        rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= R200_TCL_FOG_EXP;
+        c.f = 0.0;
+        d.f = -ctx->Fog.Density;
+        break;
+      case GL_EXP2:
+        rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= R200_TCL_FOG_EXP2;
+        c.f = 0.0;
+        d.f = -(ctx->Fog.Density * ctx->Fog.Density);
+        break;
+      default:
+        return;
+      }
+      break;
+   case GL_FOG_DENSITY:
+      switch (ctx->Fog.Mode) {
+      case GL_EXP:
+        c.f = 0.0;
+        d.f = -ctx->Fog.Density;
+        break;
+      case GL_EXP2:
+        c.f = 0.0;
+        d.f = -(ctx->Fog.Density * ctx->Fog.Density);
+        break;
+      default:
+        break;
+      }
+      break;
+   case GL_FOG_START:
+   case GL_FOG_END:
+      if (ctx->Fog.Mode == GL_LINEAR) {
+        if (ctx->Fog.Start == ctx->Fog.End) {
+           c.f = 1.0F;
+           d.f = 1.0F;
+        } else {
+           c.f = ctx->Fog.End/(ctx->Fog.End-ctx->Fog.Start);
+           d.f = -1.0/(ctx->Fog.End-ctx->Fog.Start);
+        }
+      }
+      break;
+   case GL_FOG_COLOR: 
+      R200_STATECHANGE( rmesa, ctx );
+      UNCLAMPED_FLOAT_TO_RGB_CHAN( col, ctx->Fog.Color );
+      i = r200PackColor( 4, col[0], col[1], col[2], 0 );
+      rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] &= ~R200_FOG_COLOR_MASK;
+      rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |= i;
+      break;
+   case GL_FOG_COORDINATE_SOURCE_EXT: 
+      /* What to do?
+       */
+      break;
+   default:
+      return;
+   }
+
+   if (c.i != rmesa->hw.fog.cmd[FOG_C] || d.i != rmesa->hw.fog.cmd[FOG_D]) {
+      R200_STATECHANGE( rmesa, fog );
+      rmesa->hw.fog.cmd[FOG_C] = c.i;
+      rmesa->hw.fog.cmd[FOG_D] = d.i;
+   }
+}
+
+
+/* =============================================================
+ * Scissoring
+ */
+
+
+static GLboolean intersect_rect( XF86DRIClipRectPtr out,
+                                XF86DRIClipRectPtr a,
+                                XF86DRIClipRectPtr b )
+{
+   *out = *a;
+   if ( b->x1 > out->x1 ) out->x1 = b->x1;
+   if ( b->y1 > out->y1 ) out->y1 = b->y1;
+   if ( b->x2 < out->x2 ) out->x2 = b->x2;
+   if ( b->y2 < out->y2 ) out->y2 = b->y2;
+   if ( out->x1 >= out->x2 ) return GL_FALSE;
+   if ( out->y1 >= out->y2 ) return GL_FALSE;
+   return GL_TRUE;
+}
+
+
+void r200RecalcScissorRects( r200ContextPtr rmesa )
+{
+   XF86DRIClipRectPtr out;
+   int i;
+
+   /* Grow cliprect store?
+    */
+   if (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
+      while (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
+        rmesa->state.scissor.numAllocedClipRects += 1; /* zero case */
+        rmesa->state.scissor.numAllocedClipRects *= 2;
+      }
+
+      if (rmesa->state.scissor.pClipRects)
+        FREE(rmesa->state.scissor.pClipRects);
+
+      rmesa->state.scissor.pClipRects = 
+        MALLOC( rmesa->state.scissor.numAllocedClipRects * 
+                sizeof(XF86DRIClipRectRec) );
+
+      if ( rmesa->state.scissor.pClipRects == NULL ) {
+        rmesa->state.scissor.numAllocedClipRects = 0;
+        return;
+      }
+   }
+   
+   out = rmesa->state.scissor.pClipRects;
+   rmesa->state.scissor.numClipRects = 0;
+
+   for ( i = 0 ; i < rmesa->numClipRects ;  i++ ) {
+      if ( intersect_rect( out, 
+                          &rmesa->pClipRects[i], 
+                          &rmesa->state.scissor.rect ) ) {
+        rmesa->state.scissor.numClipRects++;
+        out++;
+      }
+   }
+}
+
+
+static void r200UpdateScissor( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   if ( rmesa->dri.drawable ) {
+      __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+
+      int x = ctx->Scissor.X;
+      int y = dPriv->h - ctx->Scissor.Y - ctx->Scissor.Height;
+      int w = ctx->Scissor.X + ctx->Scissor.Width - 1;
+      int h = dPriv->h - ctx->Scissor.Y - 1;
+
+      rmesa->state.scissor.rect.x1 = x + dPriv->x;
+      rmesa->state.scissor.rect.y1 = y + dPriv->y;
+      rmesa->state.scissor.rect.x2 = w + dPriv->x + 1;
+      rmesa->state.scissor.rect.y2 = h + dPriv->y + 1;
+
+      r200RecalcScissorRects( rmesa );
+   }
+}
+
+
+static void r200Scissor( GLcontext *ctx,
+                          GLint x, GLint y, GLsizei w, GLsizei h )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   if ( ctx->Scissor.Enabled ) {
+      R200_FIREVERTICES( rmesa );      /* don't pipeline cliprect changes */
+      r200UpdateScissor( ctx );
+   }
+
+}
+
+
+/* =============================================================
+ * Culling
+ */
+
+static void r200CullFace( GLcontext *ctx, GLenum unused )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL];
+   GLuint t = rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL];
+
+   s |= R200_FFACE_SOLID | R200_BFACE_SOLID;
+   t &= ~(R200_CULL_FRONT | R200_CULL_BACK);
+
+   if ( ctx->Polygon.CullFlag ) {
+      switch ( ctx->Polygon.CullFaceMode ) {
+      case GL_FRONT:
+        s &= ~R200_FFACE_SOLID;
+        t |= R200_CULL_FRONT;
+        break;
+      case GL_BACK:
+        s &= ~R200_BFACE_SOLID;
+        t |= R200_CULL_BACK;
+        break;
+      case GL_FRONT_AND_BACK:
+        s &= ~(R200_FFACE_SOLID | R200_BFACE_SOLID);
+        t |= (R200_CULL_FRONT | R200_CULL_BACK);
+        break;
+      }
+   }
+
+   if ( rmesa->hw.set.cmd[SET_SE_CNTL] != s ) {
+      R200_STATECHANGE(rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_CNTL] = s;
+   }
+
+   if ( rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] != t ) {
+      R200_STATECHANGE(rmesa, tcl );
+      rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] = t;
+   }
+}
+
+static void r200FrontFace( GLcontext *ctx, GLenum mode )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   R200_STATECHANGE( rmesa, set );
+   rmesa->hw.set.cmd[SET_SE_CNTL] &= ~R200_FFACE_CULL_DIR_MASK;
+
+   R200_STATECHANGE( rmesa, tcl );
+   rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~R200_CULL_FRONT_IS_CCW;
+
+   switch ( mode ) {
+   case GL_CW:
+      rmesa->hw.set.cmd[SET_SE_CNTL] |= R200_FFACE_CULL_CW;
+      break;
+   case GL_CCW:
+      rmesa->hw.set.cmd[SET_SE_CNTL] |= R200_FFACE_CULL_CCW;
+      rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= R200_CULL_FRONT_IS_CCW;
+      break;
+   }
+}
+
+/* =============================================================
+ * Point state
+ */
+static void r200PointSize( GLcontext *ctx, GLfloat size )
+{
+   if (0) fprintf(stderr, "%s: %f\n", __FUNCTION__, size );
+}
+
+/* =============================================================
+ * Line state
+ */
+static void r200LineWidth( GLcontext *ctx, GLfloat widthf )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   R200_STATECHANGE( rmesa, lin );
+   R200_STATECHANGE( rmesa, set );
+
+   /* Line width is stored in U6.4 format.
+    */
+   rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] &= ~0xffff;
+   rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] |= (GLuint)(ctx->Line._Width * 16.0);
+
+   if ( widthf > 1.0 ) {
+      rmesa->hw.set.cmd[SET_SE_CNTL] |=  R200_WIDELINE_ENABLE;
+   } else {
+      rmesa->hw.set.cmd[SET_SE_CNTL] &= ~R200_WIDELINE_ENABLE;
+   }
+}
+
+static void r200LineStipple( GLcontext *ctx, GLint factor, GLushort pattern )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   R200_STATECHANGE( rmesa, lin );
+   rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] = 
+      ((((GLuint)factor & 0xff) << 16) | ((GLuint)pattern));
+}
+
+
+/* =============================================================
+ * Masks
+ */
+static void r200ColorMask( GLcontext *ctx,
+                          GLboolean r, GLboolean g,
+                          GLboolean b, GLboolean a )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint mask = r200PackColor( rmesa->r200Screen->cpp,
+                               ctx->Color.ColorMask[RCOMP],
+                               ctx->Color.ColorMask[GCOMP],
+                               ctx->Color.ColorMask[BCOMP],
+                               ctx->Color.ColorMask[ACOMP] );
+
+   GLuint flag = rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] & ~R200_PLANE_MASK_ENABLE;
+
+   if (!(r && g && b && a))
+      flag |= R200_PLANE_MASK_ENABLE;
+
+   if ( rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] != flag ) { 
+      R200_STATECHANGE( rmesa, ctx ); 
+      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = flag; 
+   } 
+
+   if ( rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] != mask ) {
+      R200_STATECHANGE( rmesa, msk );
+      rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] = mask;
+   }
+}
+
+
+/* =============================================================
+ * Polygon state
+ */
+
+static void r200PolygonOffset( GLcontext *ctx,
+                              GLfloat factor, GLfloat units )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat constant = units * rmesa->state.depth.scale;
+
+/*    factor *= 2; */
+/*    constant *= 2; */
+   
+/*    fprintf(stderr, "%s f:%f u:%f\n", __FUNCTION__, factor, constant); */
+
+   R200_STATECHANGE( rmesa, zbs );
+   rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_FACTOR]   = *(GLuint *)&factor;
+   rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_CONSTANT] = *(GLuint *)&constant;
+}
+
+static void r200PolygonStipple( GLcontext *ctx, const GLubyte *mask )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint i;
+   drmRadeonStipple stipple;
+
+   /* Must flip pattern upside down.
+    */
+   for ( i = 0 ; i < 32 ; i++ ) {
+      rmesa->state.stipple.mask[31 - i] = ((GLuint *) mask)[i];
+   }
+
+   /* TODO: push this into cmd mechanism
+    */
+   R200_FIREVERTICES( rmesa );
+   LOCK_HARDWARE( rmesa );
+
+   /* FIXME: Use window x,y offsets into stipple RAM.
+    */
+   stipple.mask = rmesa->state.stipple.mask;
+   drmCommandWrite( rmesa->dri.fd, DRM_RADEON_STIPPLE, 
+                    &stipple, sizeof(drmRadeonStipple) );
+   UNLOCK_HARDWARE( rmesa );
+}
+
+static void r200PolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLboolean flag = (ctx->_TriangleCaps & DD_TRI_UNFILLED) != 0;
+
+   /* Can't generally do unfilled via tcl, but some good special
+    * cases work. 
+    */
+   TCL_FALLBACK( ctx, R200_TCL_FALLBACK_UNFILLED, flag);
+   if (rmesa->TclFallback) {
+      r200ChooseRenderState( ctx );
+      r200ChooseVertexState( ctx );
+   }
+}
+
+
+/* =============================================================
+ * Rendering attributes
+ *
+ * We really don't want to recalculate all this every time we bind a
+ * texture.  These things shouldn't change all that often, so it makes
+ * sense to break them out of the core texture state update routines.
+ */
+
+/* Examine lighting and texture state to determine if separate specular
+ * should be enabled.
+ */
+static void r200UpdateSpecular( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   CARD32 p = rmesa->hw.ctx.cmd[CTX_PP_CNTL];
+
+   R200_STATECHANGE( rmesa, tcl );
+   R200_STATECHANGE( rmesa, vtx );
+
+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] &= ~(3<<R200_VTX_COLOR_0_SHIFT);
+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] &= ~(3<<R200_VTX_COLOR_1_SHIFT);
+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] &= ~R200_OUTPUT_COLOR_0;
+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] &= ~R200_OUTPUT_COLOR_1;
+   rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_LIGHTING_ENABLE;
+
+   p &= ~R200_SPECULAR_ENABLE;
+
+   rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_DIFFUSE_SPECULAR_COMBINE;
+
+
+   if (ctx->Light.Enabled &&
+       ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR) {
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= 
+        ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT) |
+         (R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT));        
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_COLOR_0;
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_COLOR_1;
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LIGHTING_ENABLE;
+      p |=  R200_SPECULAR_ENABLE;
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= 
+        ~R200_DIFFUSE_SPECULAR_COMBINE;
+   }
+   else if (ctx->Light.Enabled) {
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= 
+        ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT));        
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_COLOR_0;
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LIGHTING_ENABLE;
+   } else if (ctx->Fog.ColorSumEnabled ) {
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= 
+        ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT) |
+         (R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT));        
+      p |=  R200_SPECULAR_ENABLE;
+   } else {
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= 
+        ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT));        
+   }
+
+   if (ctx->Fog.Enabled) {
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= 
+        ((R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT));        
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_COLOR_1;
+   }
+
+   if ( rmesa->hw.ctx.cmd[CTX_PP_CNTL] != p ) {
+      R200_STATECHANGE( rmesa, ctx );
+      rmesa->hw.ctx.cmd[CTX_PP_CNTL] = p;
+   }
+
+   /* Update vertex/render formats
+    */
+   if (rmesa->TclFallback) { 
+      r200ChooseRenderState( ctx );
+      r200ChooseVertexState( ctx );
+   }
+}
+
+
+/* =============================================================
+ * Materials
+ */
+
+
+/* Update on colormaterial, material emmissive/ambient, 
+ * lightmodel.globalambient
+ */
+static void update_global_ambient( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   float *fcmd = (float *)R200_DB_STATE( glt );
+
+   /* Need to do more if both emmissive & ambient are PREMULT:
+    */
+   if ((rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1] &
+       ((3 << R200_FRONT_EMISSIVE_SOURCE_SHIFT) |
+       (3 << R200_FRONT_AMBIENT_SOURCE_SHIFT))) == 0) 
+   {
+      COPY_3V( &fcmd[GLT_RED], 
+              ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_EMISSION]);
+      ACC_SCALE_3V( &fcmd[GLT_RED],
+                  ctx->Light.Model.Ambient,
+                  ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_AMBIENT]);
+   } 
+   else
+   {
+      COPY_3V( &fcmd[GLT_RED], ctx->Light.Model.Ambient );
+   }
+   
+   R200_DB_STATECHANGE(rmesa, &rmesa->hw.glt);
+}
+
+/* Update on change to 
+ *    - light[p].colors
+ *    - light[p].enabled
+ *    - material,
+ *    - colormaterial enabled
+ *    - colormaterial bitmask
+ */
+static void update_light_colors( GLcontext *ctx, GLuint p )
+{
+   struct gl_light *l = &ctx->Light.Light[p];
+
+/*     fprintf(stderr, "%s\n", __FUNCTION__); */
+
+   if (l->Enabled) {
+      r200ContextPtr rmesa = R200_CONTEXT(ctx);
+      float *fcmd = (float *)R200_DB_STATE( lit[p] );
+      GLuint bitmask = ctx->Light.ColorMaterialBitmask;
+      GLfloat (*mat)[4] = ctx->Light.Material.Attrib;
+
+      COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient );    
+      COPY_4V( &fcmd[LIT_DIFFUSE_RED], l->Diffuse );
+      COPY_4V( &fcmd[LIT_SPECULAR_RED], l->Specular );
+      
+      if (!ctx->Light.ColorMaterialEnabled)
+        bitmask = 0;
+
+      if ((bitmask & MAT_BIT_FRONT_AMBIENT) == 0) 
+        SELF_SCALE_3V( &fcmd[LIT_AMBIENT_RED], mat[MAT_ATTRIB_FRONT_AMBIENT] );
+
+      if ((bitmask & MAT_BIT_FRONT_DIFFUSE) == 0) 
+        SELF_SCALE_3V( &fcmd[LIT_DIFFUSE_RED], mat[MAT_ATTRIB_FRONT_DIFFUSE] );
+      
+      if ((bitmask & MAT_BIT_FRONT_SPECULAR) == 0) 
+        SELF_SCALE_3V( &fcmd[LIT_SPECULAR_RED], mat[MAT_ATTRIB_FRONT_SPECULAR] );
+
+      R200_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] );
+   }
+}
+
+/* Also fallback for asym colormaterial mode in twoside lighting...
+ */
+static void check_twoside_fallback( GLcontext *ctx )
+{
+   GLboolean fallback = GL_FALSE;
+   GLint i;
+
+   if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) {
+      if (ctx->Light.ColorMaterialEnabled &&
+         (ctx->Light.ColorMaterialBitmask & BACK_MATERIAL_BITS) != 
+         ((ctx->Light.ColorMaterialBitmask & FRONT_MATERIAL_BITS)<<1))
+        fallback = GL_TRUE;
+      else {
+        for (i = MAT_ATTRIB_FRONT_AMBIENT; i < MAT_ATTRIB_FRONT_INDEXES; i+=2)
+           if (memcmp( ctx->Light.Material.Attrib[i],
+                       ctx->Light.Material.Attrib[i+1],
+                       sizeof(GLfloat)*4) != 0) {
+              fallback = GL_TRUE;  
+              break;
+           }
+      }
+   }
+
+   TCL_FALLBACK( ctx, R200_TCL_FALLBACK_LIGHT_TWOSIDE, fallback );
+}
+
+static void r200ColorMaterial( GLcontext *ctx, GLenum face, GLenum mode )
+{
+   if (ctx->Light.ColorMaterialEnabled) {
+      r200ContextPtr rmesa = R200_CONTEXT(ctx);
+      GLuint light_model_ctl1 = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1];
+      GLuint mask = ctx->Light.ColorMaterialBitmask;
+
+      /* Default to PREMULT:
+       */
+      light_model_ctl1 &= ~((0xf << R200_FRONT_EMISSIVE_SOURCE_SHIFT) |
+                          (0xf << R200_FRONT_AMBIENT_SOURCE_SHIFT) |
+                          (0xf << R200_FRONT_DIFFUSE_SOURCE_SHIFT) |
+                          (0xf << R200_FRONT_SPECULAR_SOURCE_SHIFT)); 
+   
+      if (mask & MAT_BIT_FRONT_EMISSION) {
+        light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
+                            R200_FRONT_EMISSIVE_SOURCE_SHIFT);
+      }
+
+      if (mask & MAT_BIT_FRONT_AMBIENT) {
+        light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
+                            R200_FRONT_AMBIENT_SOURCE_SHIFT);
+      }
+        
+      if (mask & MAT_BIT_FRONT_DIFFUSE) {
+        light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
+                            R200_FRONT_DIFFUSE_SOURCE_SHIFT);
+      }
+   
+      if (mask & MAT_BIT_FRONT_SPECULAR) {
+        light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
+                            R200_FRONT_SPECULAR_SOURCE_SHIFT);
+      }
+   
+      if (light_model_ctl1 != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1]) {
+        GLuint p;
+
+        R200_STATECHANGE( rmesa, tcl );
+        rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1] = light_model_ctl1;      
+
+        for (p = 0 ; p < MAX_LIGHTS; p++) 
+           update_light_colors( ctx, p );
+        update_global_ambient( ctx );
+      }
+   }
+   
+   check_twoside_fallback( ctx );
+}
+
+void r200UpdateMaterial( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat (*mat)[4] = ctx->Light.Material.Attrib;
+   GLfloat *fcmd = (GLfloat *)R200_DB_STATE( mtl[0] );
+   GLuint p;
+   GLuint mask = ~0;
+   
+   if (ctx->Light.ColorMaterialEnabled)
+      mask &= ~ctx->Light.ColorMaterialBitmask;
+
+   if (R200_DEBUG & DEBUG_STATE)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+      
+   if (mask & MAT_BIT_FRONT_EMISSION) {
+      fcmd[MTL_EMMISSIVE_RED]   = mat[MAT_ATTRIB_FRONT_EMISSION][0];
+      fcmd[MTL_EMMISSIVE_GREEN] = mat[MAT_ATTRIB_FRONT_EMISSION][1];
+      fcmd[MTL_EMMISSIVE_BLUE]  = mat[MAT_ATTRIB_FRONT_EMISSION][2];
+      fcmd[MTL_EMMISSIVE_ALPHA] = mat[MAT_ATTRIB_FRONT_EMISSION][3];
+   }
+   if (mask & MAT_BIT_FRONT_AMBIENT) {
+      fcmd[MTL_AMBIENT_RED]     = mat[MAT_ATTRIB_FRONT_AMBIENT][0];
+      fcmd[MTL_AMBIENT_GREEN]   = mat[MAT_ATTRIB_FRONT_AMBIENT][1];
+      fcmd[MTL_AMBIENT_BLUE]    = mat[MAT_ATTRIB_FRONT_AMBIENT][2];
+      fcmd[MTL_AMBIENT_ALPHA]   = mat[MAT_ATTRIB_FRONT_AMBIENT][3];
+   }
+   if (mask & MAT_BIT_FRONT_DIFFUSE) {
+      fcmd[MTL_DIFFUSE_RED]     = mat[MAT_ATTRIB_FRONT_DIFFUSE][0];
+      fcmd[MTL_DIFFUSE_GREEN]   = mat[MAT_ATTRIB_FRONT_DIFFUSE][1];
+      fcmd[MTL_DIFFUSE_BLUE]    = mat[MAT_ATTRIB_FRONT_DIFFUSE][2];
+      fcmd[MTL_DIFFUSE_ALPHA]   = mat[MAT_ATTRIB_FRONT_DIFFUSE][3];
+   }
+   if (mask & MAT_BIT_FRONT_SPECULAR) {
+      fcmd[MTL_SPECULAR_RED]    = mat[MAT_ATTRIB_FRONT_SPECULAR][0];
+      fcmd[MTL_SPECULAR_GREEN]  = mat[MAT_ATTRIB_FRONT_SPECULAR][1];
+      fcmd[MTL_SPECULAR_BLUE]   = mat[MAT_ATTRIB_FRONT_SPECULAR][2];
+      fcmd[MTL_SPECULAR_ALPHA]  = mat[MAT_ATTRIB_FRONT_SPECULAR][3];
+   }
+   if (mask & MAT_BIT_FRONT_SHININESS) {
+      fcmd[MTL_SHININESS]       = mat[MAT_ATTRIB_FRONT_SHININESS][0];
+   }
+
+   if (R200_DB_STATECHANGE( rmesa, &rmesa->hw.mtl[0] )) {
+      for (p = 0 ; p < MAX_LIGHTS; p++) 
+        update_light_colors( ctx, p );
+
+      check_twoside_fallback( ctx );
+      update_global_ambient( ctx );
+   }
+   else if (R200_DEBUG & (DEBUG_PRIMS|DEBUG_STATE))
+      fprintf(stderr, "%s: Elided noop material call\n", __FUNCTION__);
+}
+
+/* _NEW_LIGHT
+ * _NEW_MODELVIEW
+ * _MESA_NEW_NEED_EYE_COORDS
+ *
+ * Uses derived state from mesa:
+ *       _VP_inf_norm
+ *       _h_inf_norm
+ *       _Position
+ *       _NormDirection
+ *       _ModelViewInvScale
+ *       _NeedEyeCoords
+ *       _EyeZDir
+ *
+ * which are calculated in light.c and are correct for the current
+ * lighting space (model or eye), hence dependencies on _NEW_MODELVIEW
+ * and _MESA_NEW_NEED_EYE_COORDS.  
+ */
+static void update_light( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   /* Have to check these, or have an automatic shortcircuit mechanism
+    * to remove noop statechanges. (Or just do a better job on the
+    * front end).
+    */
+   {
+      GLuint tmp = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0];
+
+      if (ctx->_NeedEyeCoords)
+        tmp &= ~R200_LIGHT_IN_MODELSPACE;
+      else
+        tmp |= R200_LIGHT_IN_MODELSPACE;
+      
+      if (tmp != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0]) 
+      {
+        R200_STATECHANGE( rmesa, tcl );
+        rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] = tmp;
+      }
+   }
+
+   {
+      GLfloat *fcmd = (GLfloat *)R200_DB_STATE( eye );
+      fcmd[EYE_X] = ctx->_EyeZDir[0];
+      fcmd[EYE_Y] = ctx->_EyeZDir[1];
+      fcmd[EYE_Z] = - ctx->_EyeZDir[2];
+      fcmd[EYE_RESCALE_FACTOR] = ctx->_ModelViewInvScale;
+      R200_DB_STATECHANGE( rmesa, &rmesa->hw.eye );
+   }
+
+
+
+   if (ctx->Light.Enabled) {
+      GLint p;
+      for (p = 0 ; p < MAX_LIGHTS; p++) {
+        if (ctx->Light.Light[p].Enabled) {
+           struct gl_light *l = &ctx->Light.Light[p];
+           GLfloat *fcmd = (GLfloat *)R200_DB_STATE( lit[p] );
+           
+           if (l->EyePosition[3] == 0.0) {
+              COPY_3FV( &fcmd[LIT_POSITION_X], l->_VP_inf_norm ); 
+              COPY_3FV( &fcmd[LIT_DIRECTION_X], l->_h_inf_norm ); 
+              fcmd[LIT_POSITION_W] = 0;
+              fcmd[LIT_DIRECTION_W] = 0;
+           } else {
+              COPY_4V( &fcmd[LIT_POSITION_X], l->_Position );
+              fcmd[LIT_DIRECTION_X] = -l->_NormDirection[0];
+              fcmd[LIT_DIRECTION_Y] = -l->_NormDirection[1];
+              fcmd[LIT_DIRECTION_Z] = -l->_NormDirection[2];
+              fcmd[LIT_DIRECTION_W] = 0;
+           }
+
+           R200_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] );
+        }
+      }
+   }
+}
+
+static void r200Lightfv( GLcontext *ctx, GLenum light,
+                          GLenum pname, const GLfloat *params )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLint p = light - GL_LIGHT0;
+   struct gl_light *l = &ctx->Light.Light[p];
+   GLfloat *fcmd = (GLfloat *)rmesa->hw.lit[p].cmd;
+   
+
+   switch (pname) {
+   case GL_AMBIENT:            
+   case GL_DIFFUSE:
+   case GL_SPECULAR:
+      update_light_colors( ctx, p );
+      break;
+
+   case GL_SPOT_DIRECTION: 
+      /* picked up in update_light */  
+      break;
+
+   case GL_POSITION: {
+      /* positions picked up in update_light, but can do flag here */  
+      GLuint flag = (p&1)? R200_LIGHT_1_IS_LOCAL : R200_LIGHT_0_IS_LOCAL;
+      GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2;
+
+      R200_STATECHANGE(rmesa, tcl);
+      if (l->EyePosition[3] != 0.0F)
+        rmesa->hw.tcl.cmd[idx] |= flag;
+      else
+        rmesa->hw.tcl.cmd[idx] &= ~flag;
+      break;
+   }
+
+   case GL_SPOT_EXPONENT:
+      R200_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_SPOT_EXPONENT] = params[0];
+      break;
+
+   case GL_SPOT_CUTOFF: {
+      GLuint flag = (p&1) ? R200_LIGHT_1_IS_SPOT : R200_LIGHT_0_IS_SPOT;
+      GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2;
+
+      R200_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_SPOT_CUTOFF] = l->_CosCutoff;
+
+      R200_STATECHANGE(rmesa, tcl);
+      if (l->SpotCutoff != 180.0F)
+        rmesa->hw.tcl.cmd[idx] |= flag;
+      else
+        rmesa->hw.tcl.cmd[idx] &= ~flag;
+
+      break;
+   }
+
+   case GL_CONSTANT_ATTENUATION:
+      R200_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_ATTEN_CONST] = params[0];
+      break;
+   case GL_LINEAR_ATTENUATION:
+      R200_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_ATTEN_LINEAR] = params[0];
+      break;
+   case GL_QUADRATIC_ATTENUATION:
+      R200_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_ATTEN_QUADRATIC] = params[0];
+      break;
+   default:
+      return;
+   }
+
+}
+
+                 
+
+
+static void r200LightModelfv( GLcontext *ctx, GLenum pname,
+                               const GLfloat *param )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   switch (pname) {
+      case GL_LIGHT_MODEL_AMBIENT: 
+        update_global_ambient( ctx );
+        break;
+
+      case GL_LIGHT_MODEL_LOCAL_VIEWER:
+        R200_STATECHANGE( rmesa, tcl );
+        if (ctx->Light.Model.LocalViewer)
+           rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LOCAL_VIEWER;
+        else
+           rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_LOCAL_VIEWER;
+         break;
+
+      case GL_LIGHT_MODEL_TWO_SIDE:
+        R200_STATECHANGE( rmesa, tcl );
+        if (ctx->Light.Model.TwoSide)
+           rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LIGHT_TWOSIDE;
+        else
+           rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_LIGHT_TWOSIDE;
+
+        check_twoside_fallback( ctx );
+
+        if (rmesa->TclFallback) {
+           r200ChooseRenderState( ctx );
+           r200ChooseVertexState( ctx );
+        }
+         break;
+
+      case GL_LIGHT_MODEL_COLOR_CONTROL:
+        r200UpdateSpecular(ctx);
+         break;
+
+      default:
+         break;
+   }
+}
+
+static void r200ShadeModel( GLcontext *ctx, GLenum mode )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL];
+
+   s &= ~(R200_DIFFUSE_SHADE_MASK |
+         R200_ALPHA_SHADE_MASK |
+         R200_SPECULAR_SHADE_MASK |
+         R200_FOG_SHADE_MASK);
+
+   switch ( mode ) {
+   case GL_FLAT:
+      s |= (R200_DIFFUSE_SHADE_FLAT |
+           R200_ALPHA_SHADE_FLAT |
+           R200_SPECULAR_SHADE_FLAT |
+           R200_FOG_SHADE_FLAT);
+      break;
+   case GL_SMOOTH:
+      s |= (R200_DIFFUSE_SHADE_GOURAUD |
+           R200_ALPHA_SHADE_GOURAUD |
+           R200_SPECULAR_SHADE_GOURAUD |
+           R200_FOG_SHADE_GOURAUD);
+      break;
+   default:
+      return;
+   }
+
+   if ( rmesa->hw.set.cmd[SET_SE_CNTL] != s ) {
+      R200_STATECHANGE( rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_CNTL] = s;
+   }
+}
+
+
+/* =============================================================
+ * User clip planes
+ */
+
+static void r200ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq )
+{
+   GLint p = (GLint) plane - (GLint) GL_CLIP_PLANE0;
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p];
+
+   R200_STATECHANGE( rmesa, ucp[p] );
+   rmesa->hw.ucp[p].cmd[UCP_X] = ip[0];
+   rmesa->hw.ucp[p].cmd[UCP_Y] = ip[1];
+   rmesa->hw.ucp[p].cmd[UCP_Z] = ip[2];
+   rmesa->hw.ucp[p].cmd[UCP_W] = ip[3];
+}
+
+static void r200UpdateClipPlanes( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint p;
+
+   for (p = 0; p < ctx->Const.MaxClipPlanes; p++) {
+      if (ctx->Transform.ClipPlanesEnabled & (1 << p)) {
+        GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p];
+
+        R200_STATECHANGE( rmesa, ucp[p] );
+        rmesa->hw.ucp[p].cmd[UCP_X] = ip[0];
+        rmesa->hw.ucp[p].cmd[UCP_Y] = ip[1];
+        rmesa->hw.ucp[p].cmd[UCP_Z] = ip[2];
+        rmesa->hw.ucp[p].cmd[UCP_W] = ip[3];
+      }
+   }
+}
+
+
+/* =============================================================
+ * Stencil
+ */
+
+static void r200StencilFunc( GLcontext *ctx, GLenum func,
+                              GLint ref, GLuint mask )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint refmask = ((ctx->Stencil.Ref[0] << R200_STENCIL_REF_SHIFT) |
+                    (ctx->Stencil.ValueMask[0] << R200_STENCIL_MASK_SHIFT));
+
+   R200_STATECHANGE( rmesa, ctx );
+   R200_STATECHANGE( rmesa, msk );
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~R200_STENCIL_TEST_MASK;
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] &= ~(R200_STENCIL_REF_MASK|
+                                                  R200_STENCIL_VALUE_MASK);
+
+   switch ( ctx->Stencil.Function[0] ) {
+   case GL_NEVER:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_NEVER;
+      break;
+   case GL_LESS:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_LESS;
+      break;
+   case GL_EQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_EQUAL;
+      break;
+   case GL_LEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_LEQUAL;
+      break;
+   case GL_GREATER:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_GREATER;
+      break;
+   case GL_NOTEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_NEQUAL;
+      break;
+   case GL_GEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_GEQUAL;
+      break;
+   case GL_ALWAYS:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_ALWAYS;
+      break;
+   }
+
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] |= refmask;
+}
+
+static void r200StencilMask( GLcontext *ctx, GLuint mask )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   R200_STATECHANGE( rmesa, msk );
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] &= ~R200_STENCIL_WRITE_MASK;
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] |=
+      (ctx->Stencil.WriteMask[0] << R200_STENCIL_WRITEMASK_SHIFT);
+}
+
+static void r200StencilOp( GLcontext *ctx, GLenum fail,
+                            GLenum zfail, GLenum zpass )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   R200_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~(R200_STENCIL_FAIL_MASK |
+                                              R200_STENCIL_ZFAIL_MASK |
+                                              R200_STENCIL_ZPASS_MASK);
+
+   switch ( ctx->Stencil.FailFunc[0] ) {
+   case GL_KEEP:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_KEEP;
+      break;
+   case GL_ZERO:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_ZERO;
+      break;
+   case GL_REPLACE:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_REPLACE;
+      break;
+   case GL_INCR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_INC;
+      break;
+   case GL_DECR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_DEC;
+      break;
+   case GL_INCR_WRAP_EXT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_INC_WRAP;
+      break;
+   case GL_DECR_WRAP_EXT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_DEC_WRAP;
+      break;
+   case GL_INVERT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_INVERT;
+      break;
+   }
+
+   switch ( ctx->Stencil.ZFailFunc[0] ) {
+   case GL_KEEP:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_KEEP;
+      break;
+   case GL_ZERO:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_ZERO;
+      break;
+   case GL_REPLACE:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_REPLACE;
+      break;
+   case GL_INCR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_INC;
+      break;
+   case GL_DECR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_DEC;
+      break;
+   case GL_INCR_WRAP_EXT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_INC_WRAP;
+      break;
+   case GL_DECR_WRAP_EXT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_DEC_WRAP;
+      break;
+   case GL_INVERT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_INVERT;
+      break;
+   }
+
+   switch ( ctx->Stencil.ZPassFunc[0] ) {
+   case GL_KEEP:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_KEEP;
+      break;
+   case GL_ZERO:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_ZERO;
+      break;
+   case GL_REPLACE:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_REPLACE;
+      break;
+   case GL_INCR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_INC;
+      break;
+   case GL_DECR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_DEC;
+      break;
+   case GL_INCR_WRAP_EXT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_INC_WRAP;
+      break;
+   case GL_DECR_WRAP_EXT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_DEC_WRAP;
+      break;
+   case GL_INVERT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_INVERT;
+      break;
+   }
+}
+
+static void r200ClearStencil( GLcontext *ctx, GLint s )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   rmesa->state.stencil.clear = 
+      ((GLuint) ctx->Stencil.Clear |
+       (0xff << R200_STENCIL_MASK_SHIFT) |
+       (ctx->Stencil.WriteMask[0] << R200_STENCIL_WRITEMASK_SHIFT));
+}
+
+
+/* =============================================================
+ * Window position and viewport transformation
+ */
+
+/*
+ * To correctly position primitives:
+ */
+#define SUBPIXEL_X 0.125
+#define SUBPIXEL_Y 0.125
+
+void r200UpdateWindow( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+   GLfloat xoffset = (GLfloat)dPriv->x;
+   GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+
+   GLfloat sx = v[MAT_SX];
+   GLfloat tx = v[MAT_TX] + xoffset + SUBPIXEL_X;
+   GLfloat sy = - v[MAT_SY];
+   GLfloat ty = (- v[MAT_TY]) + yoffset + SUBPIXEL_Y;
+   GLfloat sz = v[MAT_SZ] * rmesa->state.depth.scale;
+   GLfloat tz = v[MAT_TZ] * rmesa->state.depth.scale;
+
+   R200_FIREVERTICES( rmesa );
+   R200_STATECHANGE( rmesa, vpt );
+
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE]  = *(GLuint *)&sx;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = *(GLuint *)&tx;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YSCALE]  = *(GLuint *)&sy;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = *(GLuint *)&ty;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZSCALE]  = *(GLuint *)&sz;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZOFFSET] = *(GLuint *)&tz;
+}
+
+
+
+static void r200Viewport( GLcontext *ctx, GLint x, GLint y,
+                           GLsizei width, GLsizei height )
+{
+   /* Don't pipeline viewport changes, conflict with window offset
+    * setting below.  Could apply deltas to rescue pipelined viewport
+    * values, or keep the originals hanging around.
+    */
+   R200_FIREVERTICES( R200_CONTEXT(ctx) );
+   r200UpdateWindow( ctx );
+}
+
+static void r200DepthRange( GLcontext *ctx, GLclampd nearval,
+                             GLclampd farval )
+{
+   r200UpdateWindow( ctx );
+}
+
+void r200UpdateViewportOffset( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+   GLfloat xoffset = (GLfloat)dPriv->x;
+   GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+
+   GLfloat tx = v[MAT_TX] + xoffset;
+   GLfloat ty = (- v[MAT_TY]) + yoffset;
+
+   if ( rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] != *(GLuint *)&tx ||
+       rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] != *(GLuint *)&ty )
+   {
+      /* Note: this should also modify whatever data the context reset
+       * code uses...
+       */
+      rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = *(GLuint *)&tx;
+      rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = *(GLuint *)&ty;
+      
+      /* update polygon stipple x/y screen offset */
+      {
+         GLuint stx, sty;
+         GLuint m = rmesa->hw.msc.cmd[MSC_RE_MISC];
+
+         m &= ~(R200_STIPPLE_X_OFFSET_MASK |
+                R200_STIPPLE_Y_OFFSET_MASK);
+
+         /* add magic offsets, then invert */
+         stx = 31 - ((rmesa->dri.drawable->x - 1) & R200_STIPPLE_COORD_MASK);
+         sty = 31 - ((rmesa->dri.drawable->y + rmesa->dri.drawable->h - 1)
+                     & R200_STIPPLE_COORD_MASK);
+
+         m |= ((stx << R200_STIPPLE_X_OFFSET_SHIFT) |
+               (sty << R200_STIPPLE_Y_OFFSET_SHIFT));
+
+         if ( rmesa->hw.msc.cmd[MSC_RE_MISC] != m ) {
+            R200_STATECHANGE( rmesa, msc );
+           rmesa->hw.msc.cmd[MSC_RE_MISC] = m;
+         }
+      }
+   }
+
+   r200UpdateScissor( ctx );
+}
+
+
+
+/* =============================================================
+ * Miscellaneous
+ */
+
+static void r200ClearColor( GLcontext *ctx, const GLfloat c[4] )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLubyte color[4];
+   CLAMPED_FLOAT_TO_UBYTE(color[0], c[0]);
+   CLAMPED_FLOAT_TO_UBYTE(color[1], c[1]);
+   CLAMPED_FLOAT_TO_UBYTE(color[2], c[2]);
+   CLAMPED_FLOAT_TO_UBYTE(color[3], c[3]);
+   rmesa->state.color.clear = r200PackColor( rmesa->r200Screen->cpp,
+                                             color[0], color[1],
+                                             color[2], color[3] );
+}
+
+
+static void r200RenderMode( GLcontext *ctx, GLenum mode )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   FALLBACK( rmesa, R200_FALLBACK_RENDER_MODE, (mode != GL_RENDER) );
+}
+
+
+static GLuint r200_rop_tab[] = {
+   R200_ROP_CLEAR,
+   R200_ROP_AND,
+   R200_ROP_AND_REVERSE,
+   R200_ROP_COPY,
+   R200_ROP_AND_INVERTED,
+   R200_ROP_NOOP,
+   R200_ROP_XOR,
+   R200_ROP_OR,
+   R200_ROP_NOR,
+   R200_ROP_EQUIV,
+   R200_ROP_INVERT,
+   R200_ROP_OR_REVERSE,
+   R200_ROP_COPY_INVERTED,
+   R200_ROP_OR_INVERTED,
+   R200_ROP_NAND,
+   R200_ROP_SET,
+};
+
+static void r200LogicOpCode( GLcontext *ctx, GLenum opcode )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint rop = (GLuint)opcode - GL_CLEAR;
+
+   ASSERT( rop < 16 );
+
+   R200_STATECHANGE( rmesa, msk );
+   rmesa->hw.msk.cmd[MSK_RB3D_ROPCNTL] = r200_rop_tab[rop];
+}
+
+
+void r200SetCliprects( r200ContextPtr rmesa, GLenum mode )
+{
+   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+
+   switch ( mode ) {
+   case GL_FRONT_LEFT:
+      rmesa->numClipRects = dPriv->numClipRects;
+      rmesa->pClipRects = (XF86DRIClipRectPtr)dPriv->pClipRects;
+      break;
+   case GL_BACK_LEFT:
+      /* Can't ignore 2d windows if we are page flipping.
+       */
+      if ( dPriv->numBackClipRects == 0 || rmesa->doPageFlip ) {
+        rmesa->numClipRects = dPriv->numClipRects;
+        rmesa->pClipRects = (XF86DRIClipRectPtr)dPriv->pClipRects;
+      }
+      else {
+        rmesa->numClipRects = dPriv->numBackClipRects;
+        rmesa->pClipRects = (XF86DRIClipRectPtr)dPriv->pBackClipRects;
+      }
+      break;
+   default:
+      fprintf(stderr, "bad mode in r200SetCliprects\n");
+      return;
+   }
+
+   if (rmesa->state.scissor.enabled)
+      r200RecalcScissorRects( rmesa );
+}
+
+
+static void r200DrawBuffer( GLcontext *ctx, GLenum mode )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   if (R200_DEBUG & DEBUG_DRI)
+      fprintf(stderr, "%s %s\n", __FUNCTION__,
+             _mesa_lookup_enum_by_nr( mode ));
+
+   R200_FIREVERTICES(rmesa);   /* don't pipeline cliprect changes */
+
+   /*
+    * _DrawDestMask is easier to cope with than <mode>.
+    */
+   switch ( ctx->Color._DrawDestMask ) {
+   case FRONT_LEFT_BIT:
+      FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_FALSE );
+      r200SetCliprects( rmesa, GL_FRONT_LEFT );
+      break;
+   case BACK_LEFT_BIT:
+      FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_FALSE );
+      r200SetCliprects( rmesa, GL_BACK_LEFT );
+      break;
+   default:
+      /* GL_NONE or GL_FRONT_AND_BACK or stereo left&right, etc */
+      FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_TRUE );
+      return;
+   }
+
+   /* We want to update the s/w rast state too so that r200SetBuffer()
+    * gets called.
+    */
+   _swrast_DrawBuffer(ctx, mode);
+
+   R200_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = (rmesa->state.color.drawOffset &
+                                           R200_COLOROFFSET_MASK);
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = rmesa->state.color.drawPitch;
+}
+
+
+static void r200ReadBuffer( GLcontext *ctx, GLenum mode )
+{
+   /* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */
+}
+
+/* =============================================================
+ * State enable/disable
+ */
+
+static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint p, flag;
+
+   if ( R200_DEBUG & DEBUG_STATE )
+      fprintf( stderr, "%s( %s = %s )\n", __FUNCTION__,
+              _mesa_lookup_enum_by_nr( cap ),
+              state ? "GL_TRUE" : "GL_FALSE" );
+
+   switch ( cap ) {
+      /* Fast track this one...
+       */
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_3D:
+      break;
+
+   case GL_ALPHA_TEST:
+      R200_STATECHANGE( rmesa, ctx );
+      if (state) {
+        rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_ALPHA_TEST_ENABLE;
+      } else {
+        rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~R200_ALPHA_TEST_ENABLE;
+      }
+      break;
+
+   case GL_BLEND:
+      R200_STATECHANGE( rmesa, ctx );
+      if (state) {
+        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_ALPHA_BLEND_ENABLE;
+      } else {
+        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_ALPHA_BLEND_ENABLE;
+      }
+      if ( ctx->Color.ColorLogicOpEnabled ) {
+        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_ROP_ENABLE;
+      } else {
+        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_ROP_ENABLE;
+      }
+      break;
+
+   case GL_CLIP_PLANE0:
+   case GL_CLIP_PLANE1:
+   case GL_CLIP_PLANE2:
+   case GL_CLIP_PLANE3:
+   case GL_CLIP_PLANE4:
+   case GL_CLIP_PLANE5: 
+      p = cap-GL_CLIP_PLANE0;
+      R200_STATECHANGE( rmesa, tcl );
+      if (state) {
+        rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (R200_UCP_ENABLE_0<<p);
+        r200ClipPlane( ctx, cap, NULL );
+      }
+      else {
+        rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(R200_UCP_ENABLE_0<<p);
+      }
+      break;
+
+   case GL_COLOR_MATERIAL:
+      r200ColorMaterial( ctx, 0, 0 );
+      if (!state) 
+        r200UpdateMaterial( ctx );
+      break;
+
+   case GL_CULL_FACE:
+      r200CullFace( ctx, 0 );
+      break;
+
+   case GL_DEPTH_TEST:
+      R200_STATECHANGE(rmesa, ctx );
+      if ( state ) {
+        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_Z_ENABLE;
+      } else {
+        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_Z_ENABLE;
+      }
+      break;
+
+   case GL_DITHER:
+      R200_STATECHANGE(rmesa, ctx );
+      if ( state ) {
+        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_DITHER_ENABLE;
+      } else {
+        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_DITHER_ENABLE;
+      }
+      break;
+
+   case GL_FOG:
+      R200_STATECHANGE(rmesa, ctx );
+      if ( state ) {
+        rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_FOG_ENABLE;
+        r200Fogfv( ctx, GL_FOG_MODE, 0 );
+      } else {
+        rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~R200_FOG_ENABLE;
+        R200_STATECHANGE(rmesa, tcl);
+        rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~R200_TCL_FOG_MASK;
+      }
+      r200UpdateSpecular( ctx ); /* for PK_SPEC */
+      break;
+
+   case GL_LIGHT0:
+   case GL_LIGHT1:
+   case GL_LIGHT2:
+   case GL_LIGHT3:
+   case GL_LIGHT4:
+   case GL_LIGHT5:
+   case GL_LIGHT6:
+   case GL_LIGHT7:
+      R200_STATECHANGE(rmesa, tcl);
+      p = cap - GL_LIGHT0;
+      if (p&1) 
+        flag = (R200_LIGHT_1_ENABLE |
+                R200_LIGHT_1_ENABLE_AMBIENT | 
+                R200_LIGHT_1_ENABLE_SPECULAR);
+      else
+        flag = (R200_LIGHT_0_ENABLE |
+                R200_LIGHT_0_ENABLE_AMBIENT | 
+                R200_LIGHT_0_ENABLE_SPECULAR);
+
+      if (state)
+        rmesa->hw.tcl.cmd[p/2 + TCL_PER_LIGHT_CTL_0] |= flag;
+      else
+        rmesa->hw.tcl.cmd[p/2 + TCL_PER_LIGHT_CTL_0] &= ~flag;
+
+      /* 
+       */
+      update_light_colors( ctx, p );
+      break;
+
+   case GL_LIGHTING:
+      r200UpdateSpecular(ctx);
+      check_twoside_fallback( ctx );
+      break;
+
+   case GL_LINE_SMOOTH:
+      R200_STATECHANGE( rmesa, ctx );
+      if ( state ) {
+        rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=  R200_ANTI_ALIAS_LINE;
+      } else {
+        rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~R200_ANTI_ALIAS_LINE;
+      }
+      break;
+
+   case GL_LINE_STIPPLE:
+      R200_STATECHANGE( rmesa, set );
+      if ( state ) {
+        rmesa->hw.set.cmd[SET_RE_CNTL] |=  R200_PATTERN_ENABLE;
+      } else {
+        rmesa->hw.set.cmd[SET_RE_CNTL] &= ~R200_PATTERN_ENABLE;
+      }
+      break;
+
+   case GL_COLOR_LOGIC_OP:
+      R200_STATECHANGE( rmesa, ctx );
+      if ( state ) {
+        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_ROP_ENABLE;
+      } else {
+        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_ROP_ENABLE;
+      }
+      break;
+      
+   case GL_NORMALIZE:
+      R200_STATECHANGE( rmesa, tcl );
+      if ( state ) {
+        rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |=  R200_NORMALIZE_NORMALS;
+      } else {
+        rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_NORMALIZE_NORMALS;
+      }
+      break;
+
+      /* Pointsize registers on r200 don't seem to do anything.  Maybe
+       * have to pass pointsizes as vertex parameters?  In any case,
+       * setting pointmin == pointsizemax == 1.0, and doing nothing
+       * for aa is enough to satisfy conform.
+       */
+   case GL_POINT_SMOOTH:
+      break;
+
+      /* These don't really do anything, as we don't use the 3vtx
+       * primitives yet.
+       */
+#if 0
+   case GL_POLYGON_OFFSET_POINT:
+      R200_STATECHANGE( rmesa, set );
+      if ( state ) {
+        rmesa->hw.set.cmd[SET_SE_CNTL] |=  R200_ZBIAS_ENABLE_POINT;
+      } else {
+        rmesa->hw.set.cmd[SET_SE_CNTL] &= ~R200_ZBIAS_ENABLE_POINT;
+      }
+      break;
+
+   case GL_POLYGON_OFFSET_LINE:
+      R200_STATECHANGE( rmesa, set );
+      if ( state ) {
+        rmesa->hw.set.cmd[SET_SE_CNTL] |=  R200_ZBIAS_ENABLE_LINE;
+      } else {
+        rmesa->hw.set.cmd[SET_SE_CNTL] &= ~R200_ZBIAS_ENABLE_LINE;
+      }
+      break;
+#endif
+
+   case GL_POLYGON_OFFSET_FILL:
+      R200_STATECHANGE( rmesa, set );
+      if ( state ) {
+        rmesa->hw.set.cmd[SET_SE_CNTL] |=  R200_ZBIAS_ENABLE_TRI;
+      } else {
+        rmesa->hw.set.cmd[SET_SE_CNTL] &= ~R200_ZBIAS_ENABLE_TRI;
+      }
+      break;
+
+   case GL_POLYGON_SMOOTH:
+      R200_STATECHANGE( rmesa, ctx );
+      if ( state ) {
+        rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=  R200_ANTI_ALIAS_POLY;
+      } else {
+        rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~R200_ANTI_ALIAS_POLY;
+      }
+      break;
+
+   case GL_POLYGON_STIPPLE:
+      R200_STATECHANGE(rmesa, set );
+      if ( state ) {
+        rmesa->hw.set.cmd[SET_RE_CNTL] |=  R200_STIPPLE_ENABLE;
+      } else {
+        rmesa->hw.set.cmd[SET_RE_CNTL] &= ~R200_STIPPLE_ENABLE;
+      }
+      break;
+
+   case GL_RESCALE_NORMAL_EXT: {
+      GLboolean tmp = ctx->_NeedEyeCoords ? state : !state;
+      R200_STATECHANGE( rmesa, tcl );
+      if ( tmp ) {
+        rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |=  R200_RESCALE_NORMALS;
+      } else {
+        rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_RESCALE_NORMALS;
+      }
+      break;
+   }
+
+   case GL_SCISSOR_TEST:
+      R200_FIREVERTICES( rmesa );
+      rmesa->state.scissor.enabled = state;
+      r200UpdateScissor( ctx );
+      break;
+
+   case GL_STENCIL_TEST:
+      if ( rmesa->state.stencil.hwBuffer ) {
+        R200_STATECHANGE( rmesa, ctx );
+        if ( state ) {
+           rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_STENCIL_ENABLE;
+        } else {
+           rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_STENCIL_ENABLE;
+        }
+      } else {
+        FALLBACK( rmesa, R200_FALLBACK_STENCIL, state );
+      }
+      break;
+
+   case GL_TEXTURE_GEN_Q:
+   case GL_TEXTURE_GEN_R:
+   case GL_TEXTURE_GEN_S:
+   case GL_TEXTURE_GEN_T:
+      /* Picked up in r200UpdateTextureState.
+       */
+      rmesa->recheck_texgen[ctx->Texture.CurrentUnit] = GL_TRUE; 
+      break;
+
+   case GL_COLOR_SUM_EXT:
+      r200UpdateSpecular ( ctx );
+      break;
+
+   default:
+      return;
+   }
+}
+
+
+void r200LightingSpaceChange( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLboolean tmp;
+
+   if (R200_DEBUG & DEBUG_STATE) 
+      fprintf(stderr, "%s %d BEFORE %x\n", __FUNCTION__, ctx->_NeedEyeCoords,
+             rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0]);
+
+   if (ctx->_NeedEyeCoords)
+      tmp = ctx->Transform.RescaleNormals;
+   else
+      tmp = !ctx->Transform.RescaleNormals;
+
+   R200_STATECHANGE( rmesa, tcl );
+   if ( tmp ) {
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |=  R200_RESCALE_NORMALS;
+   } else {
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_RESCALE_NORMALS;
+   }
+
+   if (R200_DEBUG & DEBUG_STATE) 
+      fprintf(stderr, "%s %d AFTER %x\n", __FUNCTION__, ctx->_NeedEyeCoords,
+             rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0]);
+}
+
+/* =============================================================
+ * Deferred state management - matrices, textures, other?
+ */
+
+
+
+
+static void upload_matrix( r200ContextPtr rmesa, GLfloat *src, int idx )
+{
+   float *dest = ((float *)R200_DB_STATE( mat[idx] ))+MAT_ELT_0;
+   int i;
+
+
+   for (i = 0 ; i < 4 ; i++) {
+      *dest++ = src[i];
+      *dest++ = src[i+4];
+      *dest++ = src[i+8];
+      *dest++ = src[i+12];
+   }
+
+   R200_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] );
+}
+
+static void upload_matrix_t( r200ContextPtr rmesa, const GLfloat *src, int idx )
+{
+   float *dest = ((float *)R200_DB_STATE( mat[idx] ))+MAT_ELT_0;
+   memcpy(dest, src, 16*sizeof(float));
+   R200_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] );
+}
+
+
+static void update_texturematrix( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+   GLuint tpc = rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_0];
+   GLuint compsel = rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL];
+   int unit;
+
+   if (R200_DEBUG & DEBUG_STATE) 
+      fprintf(stderr, "%s before COMPSEL: %x\n", __FUNCTION__,
+             rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL]);
+
+   rmesa->TexMatEnabled = 0;
+   rmesa->TexMatCompSel = 0;
+
+   for (unit = 0 ; unit < 2; unit++) {
+      if (!ctx->Texture.Unit[unit]._ReallyEnabled) 
+        continue;
+
+      if (ctx->TextureMatrixStack[unit].Top->type != MATRIX_IDENTITY) {
+        rmesa->TexMatEnabled |= (R200_TEXGEN_TEXMAT_0_ENABLE|
+                                 R200_TEXMAT_0_ENABLE) << unit;
+
+        rmesa->TexMatCompSel |= R200_OUTPUT_TEX_0 << unit;
+
+        if (rmesa->TexGenEnabled & (R200_TEXMAT_0_ENABLE << unit)) {
+           /* Need to preconcatenate any active texgen 
+            * obj/eyeplane matrices:
+            */
+           _math_matrix_mul_matrix( &rmesa->tmpmat, 
+                                    &rmesa->TexGenMatrix[unit],
+                                    ctx->TextureMatrixStack[unit].Top );
+           upload_matrix( rmesa, rmesa->tmpmat.m, R200_MTX_TEX0+unit );
+        } 
+        else {
+           upload_matrix( rmesa, ctx->TextureMatrixStack[unit].Top->m, 
+                          R200_MTX_TEX0+unit );
+        }
+      }
+      else if (rmesa->TexGenEnabled & (R200_TEXMAT_0_ENABLE << unit)) {
+        upload_matrix( rmesa, rmesa->TexGenMatrix[unit].m, 
+                       R200_MTX_TEX0+unit );
+      }
+   }
+
+   tpc = (rmesa->TexMatEnabled | rmesa->TexGenEnabled);
+   if (tpc != rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_0] ||
+       rmesa->TexGenInputs != rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_1]) {
+      R200_STATECHANGE(rmesa, tcg);
+      rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_0] = tpc;
+      rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_1] = rmesa->TexGenInputs;
+   }
+
+   compsel &= ~R200_OUTPUT_TEX_MASK;
+   compsel |= rmesa->TexMatCompSel | rmesa->TexGenCompSel;
+   if (compsel != rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL]) {
+      R200_STATECHANGE(rmesa, vtx);
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] = compsel;
+   }
+}
+
+
+
+void r200ValidateState( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint new_state = rmesa->NewGLState;
+
+   if (new_state & _NEW_TEXTURE) {
+      r200UpdateTextureState( ctx );
+      new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */
+   }
+
+   /* Need an event driven matrix update?
+    */
+   if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION)) 
+      upload_matrix( rmesa, ctx->_ModelProjectMatrix.m, R200_MTX_MVP );
+
+   /* Need these for lighting (shouldn't upload otherwise)
+    */
+   if (new_state & (_NEW_MODELVIEW)) {
+      upload_matrix( rmesa, ctx->ModelviewMatrixStack.Top->m, R200_MTX_MV );
+      upload_matrix_t( rmesa, ctx->ModelviewMatrixStack.Top->inv, R200_MTX_IMV );
+   }
+
+   /* Does this need to be triggered on eg. modelview for
+    * texgen-derived objplane/eyeplane matrices?
+    */
+   if (new_state & (_NEW_TEXTURE|_NEW_TEXTURE_MATRIX)) {
+      update_texturematrix( ctx );
+   }      
+
+   if (new_state & (_NEW_LIGHT|_NEW_MODELVIEW|_MESA_NEW_NEED_EYE_COORDS)) {
+      update_light( ctx );
+   }
+
+   /* emit all active clip planes if projection matrix changes.
+    */
+   if (new_state & (_NEW_PROJECTION)) {
+      if (ctx->Transform.ClipPlanesEnabled) 
+        r200UpdateClipPlanes( ctx );
+   }
+
+
+   rmesa->NewGLState = 0;
+}
+
+
+static void r200InvalidateState( GLcontext *ctx, GLuint new_state )
+{
+   _swrast_InvalidateState( ctx, new_state );
+   _swsetup_InvalidateState( ctx, new_state );
+   _ac_InvalidateState( ctx, new_state );
+   _tnl_InvalidateState( ctx, new_state );
+   _ae_invalidate_state( ctx, new_state );
+   R200_CONTEXT(ctx)->NewGLState |= new_state;
+   r200VtxfmtInvalidate( ctx );
+}
+
+static void r200WrapRunPipeline( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+
+   if (0)
+      fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->NewGLState);
+
+   /* Validate state:
+    */
+   if (rmesa->NewGLState)
+      r200ValidateState( ctx );
+
+   if (tnl->vb.Material) {
+      TCL_FALLBACK( ctx, R200_TCL_FALLBACK_MATERIAL, GL_TRUE );
+   }
+
+   /* Run the pipeline.
+    */ 
+   _tnl_run_pipeline( ctx );
+
+   if (tnl->vb.Material) {
+      TCL_FALLBACK( ctx, R200_TCL_FALLBACK_MATERIAL, GL_FALSE );
+      r200UpdateMaterial( ctx ); /* not needed any more? */
+   }
+}
+
+
+/* Initialize the driver's state functions.
+ */
+void r200InitStateFuncs( GLcontext *ctx )
+{
+   ctx->Driver.UpdateState             = r200InvalidateState;
+   ctx->Driver.LightingSpaceChange      = r200LightingSpaceChange;
+
+   ctx->Driver.DrawBuffer              = r200DrawBuffer;
+   ctx->Driver.ReadBuffer              = r200ReadBuffer;
+
+   ctx->Driver.AlphaFunc               = r200AlphaFunc;
+   ctx->Driver.BlendEquation           = r200BlendEquation;
+   ctx->Driver.BlendFunc               = r200BlendFunc;
+   ctx->Driver.BlendFuncSeparate       = r200BlendFuncSeparate;
+   ctx->Driver.ClearColor              = r200ClearColor;
+   ctx->Driver.ClearDepth              = NULL;
+   ctx->Driver.ClearIndex              = NULL;
+   ctx->Driver.ClearStencil            = r200ClearStencil;
+   ctx->Driver.ClipPlane               = r200ClipPlane;
+   ctx->Driver.ColorMask               = r200ColorMask;
+   ctx->Driver.CullFace                        = r200CullFace;
+   ctx->Driver.DepthFunc               = r200DepthFunc;
+   ctx->Driver.DepthMask               = r200DepthMask;
+   ctx->Driver.DepthRange              = r200DepthRange;
+   ctx->Driver.Enable                  = r200Enable;
+   ctx->Driver.Fogfv                   = r200Fogfv;
+   ctx->Driver.FrontFace               = r200FrontFace;
+   ctx->Driver.Hint                    = NULL;
+   ctx->Driver.IndexMask               = NULL;
+   ctx->Driver.LightModelfv            = r200LightModelfv;
+   ctx->Driver.Lightfv                 = r200Lightfv;
+   ctx->Driver.LineStipple              = r200LineStipple;
+   ctx->Driver.LineWidth                = r200LineWidth;
+   ctx->Driver.LogicOpcode             = r200LogicOpCode;
+   ctx->Driver.PolygonMode             = r200PolygonMode;
+   ctx->Driver.PolygonOffset           = r200PolygonOffset;
+   ctx->Driver.PolygonStipple          = r200PolygonStipple;
+   ctx->Driver.PointSize                = r200PointSize;
+   ctx->Driver.RenderMode              = r200RenderMode;
+   ctx->Driver.Scissor                 = r200Scissor;
+   ctx->Driver.ShadeModel              = r200ShadeModel;
+   ctx->Driver.StencilFunc             = r200StencilFunc;
+   ctx->Driver.StencilMask             = r200StencilMask;
+   ctx->Driver.StencilOp               = r200StencilOp;
+   ctx->Driver.Viewport                        = r200Viewport;
+
+   /* Swrast hooks for imaging extensions:
+    */
+   ctx->Driver.CopyColorTable          = _swrast_CopyColorTable;
+   ctx->Driver.CopyColorSubTable       = _swrast_CopyColorSubTable;
+   ctx->Driver.CopyConvolutionFilter1D = _swrast_CopyConvolutionFilter1D;
+   ctx->Driver.CopyConvolutionFilter2D = _swrast_CopyConvolutionFilter2D;
+
+   TNL_CONTEXT(ctx)->Driver.NotifyMaterialChange = r200UpdateMaterial;
+   TNL_CONTEXT(ctx)->Driver.RunPipeline = r200WrapRunPipeline;
+}
diff --git a/src/mesa/drivers/dri/r200/r200_state.c~ b/src/mesa/drivers/dri/r200/r200_state.c~
new file mode 100644 (file)
index 0000000..f025a88
--- /dev/null
@@ -0,0 +1,2168 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "glheader.h"
+#include "imports.h"
+#include "api_arrayelt.h"
+#include "enums.h"
+#include "colormac.h"
+
+#include "swrast/swrast.h"
+#include "array_cache/acache.h"
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+#include "swrast_setup/swrast_setup.h"
+
+
+#include "r200_context.h"
+#include "r200_ioctl.h"
+#include "r200_state.h"
+#include "r200_tcl.h"
+#include "r200_tex.h"
+#include "r200_swtcl.h"
+#include "r200_vtxfmt.h"
+
+
+/* =============================================================
+ * Alpha blending
+ */
+
+static void r200AlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   int pp_misc = rmesa->hw.ctx.cmd[CTX_PP_MISC];
+   GLubyte refByte;
+
+   CLAMPED_FLOAT_TO_UBYTE(refByte, ref);
+
+   R200_STATECHANGE( rmesa, ctx );
+
+   pp_misc &= ~(R200_ALPHA_TEST_OP_MASK | R200_REF_ALPHA_MASK);
+   pp_misc |= (refByte & R200_REF_ALPHA_MASK);
+
+   switch ( func ) {
+   case GL_NEVER:
+      pp_misc |= R200_ALPHA_TEST_FAIL; 
+      break;
+   case GL_LESS:
+      pp_misc |= R200_ALPHA_TEST_LESS;
+      break;
+   case GL_EQUAL:
+      pp_misc |= R200_ALPHA_TEST_EQUAL;
+      break;
+   case GL_LEQUAL:
+      pp_misc |= R200_ALPHA_TEST_LEQUAL;
+      break;
+   case GL_GREATER:
+      pp_misc |= R200_ALPHA_TEST_GREATER;
+      break;
+   case GL_NOTEQUAL:
+      pp_misc |= R200_ALPHA_TEST_NEQUAL;
+      break;
+   case GL_GEQUAL:
+      pp_misc |= R200_ALPHA_TEST_GEQUAL;
+      break;
+   case GL_ALWAYS:
+      pp_misc |= R200_ALPHA_TEST_PASS;
+      break;
+   }
+
+   rmesa->hw.ctx.cmd[CTX_PP_MISC] = pp_misc;
+}
+
+static void r200BlendEquation( GLcontext *ctx, GLenum mode )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & ~R200_COMB_FCN_MASK;
+
+   switch ( mode ) {
+   case GL_FUNC_ADD:
+   case GL_LOGIC_OP:
+      b |= R200_COMB_FCN_ADD_CLAMP;
+      break;
+
+   case GL_FUNC_SUBTRACT:
+      b |= R200_COMB_FCN_SUB_CLAMP;
+      break;
+
+   case GL_FUNC_REVERSE_SUBTRACT:
+      b |= R200_COMB_FCN_RSUB_CLAMP;
+      break;
+
+   case GL_MIN:
+      b |= R200_COMB_FCN_MIN;
+      break;
+
+   case GL_MAX:
+      b |= R200_COMB_FCN_MAX;
+      break;
+
+   default:
+      break;
+   }
+
+   R200_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = b;
+   if ( ctx->Color.ColorLogicOpEnabled ) {
+      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_ROP_ENABLE;
+   } else {
+      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_ROP_ENABLE;
+   }
+}
+
+static void r200BlendFunc( GLcontext *ctx, GLenum sfactor, GLenum dfactor )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & 
+      ~(R200_SRC_BLEND_MASK | R200_DST_BLEND_MASK);
+
+   switch ( ctx->Color.BlendSrcRGB ) {
+   case GL_ZERO:
+      b |= R200_SRC_BLEND_GL_ZERO;
+      break;
+   case GL_ONE:
+      b |= R200_SRC_BLEND_GL_ONE;
+      break;
+   case GL_DST_COLOR:
+      b |= R200_SRC_BLEND_GL_DST_COLOR;
+      break;
+   case GL_ONE_MINUS_DST_COLOR:
+      b |= R200_SRC_BLEND_GL_ONE_MINUS_DST_COLOR;
+      break;
+   case GL_SRC_COLOR:
+      b |= R200_SRC_BLEND_GL_SRC_COLOR;
+      break;
+   case GL_ONE_MINUS_SRC_COLOR:
+      b |= R200_SRC_BLEND_GL_ONE_MINUS_SRC_COLOR;
+      break;
+   case GL_SRC_ALPHA:
+      b |= R200_SRC_BLEND_GL_SRC_ALPHA;
+      break;
+   case GL_ONE_MINUS_SRC_ALPHA:
+      b |= R200_SRC_BLEND_GL_ONE_MINUS_SRC_ALPHA;
+      break;
+   case GL_DST_ALPHA:
+      b |= R200_SRC_BLEND_GL_DST_ALPHA;
+      break;
+   case GL_ONE_MINUS_DST_ALPHA:
+      b |= R200_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA;
+      break;
+   case GL_SRC_ALPHA_SATURATE:
+      b |= R200_SRC_BLEND_GL_SRC_ALPHA_SATURATE;
+      break;
+   case GL_CONSTANT_COLOR:
+      b |= R200_SRC_BLEND_GL_CONST_COLOR;
+      break;
+   case GL_ONE_MINUS_CONSTANT_COLOR:
+      b |= R200_SRC_BLEND_GL_ONE_MINUS_CONST_COLOR;
+      break;
+   case GL_CONSTANT_ALPHA:
+      b |= R200_SRC_BLEND_GL_CONST_ALPHA;
+      break;
+   case GL_ONE_MINUS_CONSTANT_ALPHA:
+      b |= R200_SRC_BLEND_GL_ONE_MINUS_CONST_ALPHA;
+      break;
+   default:
+      break;
+   }
+
+   switch ( ctx->Color.BlendDstRGB ) {
+   case GL_ZERO:
+      b |= R200_DST_BLEND_GL_ZERO;
+      break;
+   case GL_ONE:
+      b |= R200_DST_BLEND_GL_ONE;
+      break;
+   case GL_SRC_COLOR:
+      b |= R200_DST_BLEND_GL_SRC_COLOR;
+      break;
+   case GL_ONE_MINUS_SRC_COLOR:
+      b |= R200_DST_BLEND_GL_ONE_MINUS_SRC_COLOR;
+      break;
+   case GL_SRC_ALPHA:
+      b |= R200_DST_BLEND_GL_SRC_ALPHA;
+      break;
+   case GL_ONE_MINUS_SRC_ALPHA:
+      b |= R200_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA;
+      break;
+   case GL_DST_COLOR:
+      b |= R200_DST_BLEND_GL_DST_COLOR;
+      break;
+   case GL_ONE_MINUS_DST_COLOR:
+      b |= R200_DST_BLEND_GL_ONE_MINUS_DST_COLOR;
+      break;
+   case GL_DST_ALPHA:
+      b |= R200_DST_BLEND_GL_DST_ALPHA;
+      break;
+   case GL_ONE_MINUS_DST_ALPHA:
+      b |= R200_DST_BLEND_GL_ONE_MINUS_DST_ALPHA;
+      break;
+   case GL_CONSTANT_COLOR:
+      b |= R200_DST_BLEND_GL_CONST_COLOR;
+      break;
+   case GL_ONE_MINUS_CONSTANT_COLOR:
+      b |= R200_DST_BLEND_GL_ONE_MINUS_CONST_COLOR;
+      break;
+   case GL_CONSTANT_ALPHA:
+      b |= R200_DST_BLEND_GL_CONST_ALPHA;
+      break;
+   case GL_ONE_MINUS_CONSTANT_ALPHA:
+      b |= R200_DST_BLEND_GL_ONE_MINUS_CONST_ALPHA;
+      break;
+   default:
+      break;
+   }
+
+   R200_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = b;
+}
+
+static void r200BlendFuncSeparate( GLcontext *ctx,
+                                    GLenum sfactorRGB, GLenum dfactorRGB,
+                                    GLenum sfactorA, GLenum dfactorA )
+{
+   r200BlendFunc( ctx, sfactorRGB, dfactorRGB );
+}
+
+
+/* =============================================================
+ * Depth testing
+ */
+
+static void r200DepthFunc( GLcontext *ctx, GLenum func )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   R200_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~R200_Z_TEST_MASK;
+
+   switch ( ctx->Depth.Func ) {
+   case GL_NEVER:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_NEVER;
+      break;
+   case GL_LESS:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_LESS;
+      break;
+   case GL_EQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_EQUAL;
+      break;
+   case GL_LEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_LEQUAL;
+      break;
+   case GL_GREATER:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_GREATER;
+      break;
+   case GL_NOTEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_NEQUAL;
+      break;
+   case GL_GEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_GEQUAL;
+      break;
+   case GL_ALWAYS:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_ALWAYS;
+      break;
+   }
+}
+
+
+static void r200DepthMask( GLcontext *ctx, GLboolean flag )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   R200_STATECHANGE( rmesa, ctx );
+
+   if ( ctx->Depth.Mask ) {
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |=  R200_Z_WRITE_ENABLE;
+   } else {
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~R200_Z_WRITE_ENABLE;
+   }
+}
+
+
+/* =============================================================
+ * Fog
+ */
+
+
+static void r200Fogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   union { int i; float f; } c, d;
+   GLchan col[4];
+   GLuint i;
+
+   c.i = rmesa->hw.fog.cmd[FOG_C];
+   d.i = rmesa->hw.fog.cmd[FOG_D];
+
+   switch (pname) {
+   case GL_FOG_MODE:
+      if (!ctx->Fog.Enabled)
+        return;
+      R200_STATECHANGE(rmesa, tcl);
+      rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~R200_TCL_FOG_MASK;
+      switch (ctx->Fog.Mode) {
+      case GL_LINEAR:
+        rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= R200_TCL_FOG_LINEAR;
+        if (ctx->Fog.Start == ctx->Fog.End) {
+           c.f = 1.0F;
+           d.f = 1.0F;
+        }
+        else {
+           c.f = ctx->Fog.End/(ctx->Fog.End-ctx->Fog.Start);
+           d.f = -1.0/(ctx->Fog.End-ctx->Fog.Start);
+        }
+        break;
+      case GL_EXP:
+        rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= R200_TCL_FOG_EXP;
+        c.f = 0.0;
+        d.f = -ctx->Fog.Density;
+        break;
+      case GL_EXP2:
+        rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= R200_TCL_FOG_EXP2;
+        c.f = 0.0;
+        d.f = -(ctx->Fog.Density * ctx->Fog.Density);
+        break;
+      default:
+        return;
+      }
+      break;
+   case GL_FOG_DENSITY:
+      switch (ctx->Fog.Mode) {
+      case GL_EXP:
+        c.f = 0.0;
+        d.f = -ctx->Fog.Density;
+        break;
+      case GL_EXP2:
+        c.f = 0.0;
+        d.f = -(ctx->Fog.Density * ctx->Fog.Density);
+        break;
+      default:
+        break;
+      }
+      break;
+   case GL_FOG_START:
+   case GL_FOG_END:
+      if (ctx->Fog.Mode == GL_LINEAR) {
+        if (ctx->Fog.Start == ctx->Fog.End) {
+           c.f = 1.0F;
+           d.f = 1.0F;
+        } else {
+           c.f = ctx->Fog.End/(ctx->Fog.End-ctx->Fog.Start);
+           d.f = -1.0/(ctx->Fog.End-ctx->Fog.Start);
+        }
+      }
+      break;
+   case GL_FOG_COLOR: 
+      R200_STATECHANGE( rmesa, ctx );
+      UNCLAMPED_FLOAT_TO_RGB_CHAN( col, ctx->Fog.Color );
+      i = r200PackColor( 4, col[0], col[1], col[2], 0 );
+      rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] &= ~R200_FOG_COLOR_MASK;
+      rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |= i;
+      break;
+   case GL_FOG_COORDINATE_SOURCE_EXT: 
+      /* What to do?
+       */
+      break;
+   default:
+      return;
+   }
+
+   if (c.i != rmesa->hw.fog.cmd[FOG_C] || d.i != rmesa->hw.fog.cmd[FOG_D]) {
+      R200_STATECHANGE( rmesa, fog );
+      rmesa->hw.fog.cmd[FOG_C] = c.i;
+      rmesa->hw.fog.cmd[FOG_D] = d.i;
+   }
+}
+
+
+/* =============================================================
+ * Scissoring
+ */
+
+
+static GLboolean intersect_rect( XF86DRIClipRectPtr out,
+                                XF86DRIClipRectPtr a,
+                                XF86DRIClipRectPtr b )
+{
+   *out = *a;
+   if ( b->x1 > out->x1 ) out->x1 = b->x1;
+   if ( b->y1 > out->y1 ) out->y1 = b->y1;
+   if ( b->x2 < out->x2 ) out->x2 = b->x2;
+   if ( b->y2 < out->y2 ) out->y2 = b->y2;
+   if ( out->x1 >= out->x2 ) return GL_FALSE;
+   if ( out->y1 >= out->y2 ) return GL_FALSE;
+   return GL_TRUE;
+}
+
+
+void r200RecalcScissorRects( r200ContextPtr rmesa )
+{
+   XF86DRIClipRectPtr out;
+   int i;
+
+   /* Grow cliprect store?
+    */
+   if (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
+      while (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
+        rmesa->state.scissor.numAllocedClipRects += 1; /* zero case */
+        rmesa->state.scissor.numAllocedClipRects *= 2;
+      }
+
+      if (rmesa->state.scissor.pClipRects)
+        FREE(rmesa->state.scissor.pClipRects);
+
+      rmesa->state.scissor.pClipRects = 
+        MALLOC( rmesa->state.scissor.numAllocedClipRects * 
+                sizeof(XF86DRIClipRectRec) );
+
+      if ( rmesa->state.scissor.pClipRects == NULL ) {
+        rmesa->state.scissor.numAllocedClipRects = 0;
+        return;
+      }
+   }
+   
+   out = rmesa->state.scissor.pClipRects;
+   rmesa->state.scissor.numClipRects = 0;
+
+   for ( i = 0 ; i < rmesa->numClipRects ;  i++ ) {
+      if ( intersect_rect( out, 
+                          &rmesa->pClipRects[i], 
+                          &rmesa->state.scissor.rect ) ) {
+        rmesa->state.scissor.numClipRects++;
+        out++;
+      }
+   }
+}
+
+
+static void r200UpdateScissor( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   if ( rmesa->dri.drawable ) {
+      __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+
+      int x = ctx->Scissor.X;
+      int y = dPriv->h - ctx->Scissor.Y - ctx->Scissor.Height;
+      int w = ctx->Scissor.X + ctx->Scissor.Width - 1;
+      int h = dPriv->h - ctx->Scissor.Y - 1;
+
+      rmesa->state.scissor.rect.x1 = x + dPriv->x;
+      rmesa->state.scissor.rect.y1 = y + dPriv->y;
+      rmesa->state.scissor.rect.x2 = w + dPriv->x + 1;
+      rmesa->state.scissor.rect.y2 = h + dPriv->y + 1;
+
+      r200RecalcScissorRects( rmesa );
+   }
+}
+
+
+static void r200Scissor( GLcontext *ctx,
+                          GLint x, GLint y, GLsizei w, GLsizei h )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   if ( ctx->Scissor.Enabled ) {
+      R200_FIREVERTICES( rmesa );      /* don't pipeline cliprect changes */
+      r200UpdateScissor( ctx );
+   }
+
+}
+
+
+/* =============================================================
+ * Culling
+ */
+
+static void r200CullFace( GLcontext *ctx, GLenum unused )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL];
+   GLuint t = rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL];
+
+   s |= R200_FFACE_SOLID | R200_BFACE_SOLID;
+   t &= ~(R200_CULL_FRONT | R200_CULL_BACK);
+
+   if ( ctx->Polygon.CullFlag ) {
+      switch ( ctx->Polygon.CullFaceMode ) {
+      case GL_FRONT:
+        s &= ~R200_FFACE_SOLID;
+        t |= R200_CULL_FRONT;
+        break;
+      case GL_BACK:
+        s &= ~R200_BFACE_SOLID;
+        t |= R200_CULL_BACK;
+        break;
+      case GL_FRONT_AND_BACK:
+        s &= ~(R200_FFACE_SOLID | R200_BFACE_SOLID);
+        t |= (R200_CULL_FRONT | R200_CULL_BACK);
+        break;
+      }
+   }
+
+   if ( rmesa->hw.set.cmd[SET_SE_CNTL] != s ) {
+      R200_STATECHANGE(rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_CNTL] = s;
+   }
+
+   if ( rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] != t ) {
+      R200_STATECHANGE(rmesa, tcl );
+      rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] = t;
+   }
+}
+
+static void r200FrontFace( GLcontext *ctx, GLenum mode )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   R200_STATECHANGE( rmesa, set );
+   rmesa->hw.set.cmd[SET_SE_CNTL] &= ~R200_FFACE_CULL_DIR_MASK;
+
+   R200_STATECHANGE( rmesa, tcl );
+   rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~R200_CULL_FRONT_IS_CCW;
+
+   switch ( mode ) {
+   case GL_CW:
+      rmesa->hw.set.cmd[SET_SE_CNTL] |= R200_FFACE_CULL_CW;
+      break;
+   case GL_CCW:
+      rmesa->hw.set.cmd[SET_SE_CNTL] |= R200_FFACE_CULL_CCW;
+      rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= R200_CULL_FRONT_IS_CCW;
+      break;
+   }
+}
+
+/* =============================================================
+ * Point state
+ */
+static void r200PointSize( GLcontext *ctx, GLfloat size )
+{
+   if (0) fprintf(stderr, "%s: %f\n", __FUNCTION__, size );
+}
+
+/* =============================================================
+ * Line state
+ */
+static void r200LineWidth( GLcontext *ctx, GLfloat widthf )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   R200_STATECHANGE( rmesa, lin );
+   R200_STATECHANGE( rmesa, set );
+
+   /* Line width is stored in U6.4 format.
+    */
+   rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] &= ~0xffff;
+   rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] |= (GLuint)(ctx->Line._Width * 16.0);
+
+   if ( widthf > 1.0 ) {
+      rmesa->hw.set.cmd[SET_SE_CNTL] |=  R200_WIDELINE_ENABLE;
+   } else {
+      rmesa->hw.set.cmd[SET_SE_CNTL] &= ~R200_WIDELINE_ENABLE;
+   }
+}
+
+static void r200LineStipple( GLcontext *ctx, GLint factor, GLushort pattern )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   R200_STATECHANGE( rmesa, lin );
+   rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] = 
+      ((((GLuint)factor & 0xff) << 16) | ((GLuint)pattern));
+}
+
+
+/* =============================================================
+ * Masks
+ */
+static void r200ColorMask( GLcontext *ctx,
+                          GLboolean r, GLboolean g,
+                          GLboolean b, GLboolean a )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint mask = r200PackColor( rmesa->r200Screen->cpp,
+                               ctx->Color.ColorMask[RCOMP],
+                               ctx->Color.ColorMask[GCOMP],
+                               ctx->Color.ColorMask[BCOMP],
+                               ctx->Color.ColorMask[ACOMP] );
+
+   GLuint flag = rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] & ~R200_PLANE_MASK_ENABLE;
+
+   if (!(r && g && b && a))
+      flag |= R200_PLANE_MASK_ENABLE;
+
+   if ( rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] != flag ) { 
+      R200_STATECHANGE( rmesa, ctx ); 
+      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = flag; 
+   } 
+
+   if ( rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] != mask ) {
+      R200_STATECHANGE( rmesa, msk );
+      rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] = mask;
+   }
+}
+
+
+/* =============================================================
+ * Polygon state
+ */
+
+static void r200PolygonOffset( GLcontext *ctx,
+                              GLfloat factor, GLfloat units )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat constant = units * rmesa->state.depth.scale;
+
+/*    factor *= 2; */
+/*    constant *= 2; */
+   
+/*    fprintf(stderr, "%s f:%f u:%f\n", __FUNCTION__, factor, constant); */
+
+   R200_STATECHANGE( rmesa, zbs );
+   rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_FACTOR]   = *(GLuint *)&factor;
+   rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_CONSTANT] = *(GLuint *)&constant;
+}
+
+static void r200PolygonStipple( GLcontext *ctx, const GLubyte *mask )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint i;
+   drmRadeonStipple stipple;
+
+   /* Must flip pattern upside down.
+    */
+   for ( i = 0 ; i < 32 ; i++ ) {
+      rmesa->state.stipple.mask[31 - i] = ((GLuint *) mask)[i];
+   }
+
+   /* TODO: push this into cmd mechanism
+    */
+   R200_FIREVERTICES( rmesa );
+   LOCK_HARDWARE( rmesa );
+
+   /* FIXME: Use window x,y offsets into stipple RAM.
+    */
+   stipple.mask = rmesa->state.stipple.mask;
+   drmCommandWrite( rmesa->dri.fd, DRM_RADEON_STIPPLE, 
+                    &stipple, sizeof(drmRadeonStipple) );
+   UNLOCK_HARDWARE( rmesa );
+}
+
+static void r200PolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLboolean flag = (ctx->_TriangleCaps & DD_TRI_UNFILLED) != 0;
+
+   /* Can't generally do unfilled via tcl, but some good special
+    * cases work. 
+    */
+   TCL_FALLBACK( ctx, R200_TCL_FALLBACK_UNFILLED, flag);
+   if (rmesa->TclFallback) {
+      r200ChooseRenderState( ctx );
+      r200ChooseVertexState( ctx );
+   }
+}
+
+
+/* =============================================================
+ * Rendering attributes
+ *
+ * We really don't want to recalculate all this every time we bind a
+ * texture.  These things shouldn't change all that often, so it makes
+ * sense to break them out of the core texture state update routines.
+ */
+
+/* Examine lighting and texture state to determine if separate specular
+ * should be enabled.
+ */
+static void r200UpdateSpecular( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   CARD32 p = rmesa->hw.ctx.cmd[CTX_PP_CNTL];
+
+   R200_STATECHANGE( rmesa, tcl );
+   R200_STATECHANGE( rmesa, vtx );
+
+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] &= ~(3<<R200_VTX_COLOR_0_SHIFT);
+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] &= ~(3<<R200_VTX_COLOR_1_SHIFT);
+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] &= ~R200_OUTPUT_COLOR_0;
+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] &= ~R200_OUTPUT_COLOR_1;
+   rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_LIGHTING_ENABLE;
+
+   p &= ~R200_SPECULAR_ENABLE;
+
+   rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_DIFFUSE_SPECULAR_COMBINE;
+
+
+   if (ctx->Light.Enabled &&
+       ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR) {
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= 
+        ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT) |
+         (R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT));        
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_COLOR_0;
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_COLOR_1;
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LIGHTING_ENABLE;
+      p |=  R200_SPECULAR_ENABLE;
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= 
+        ~R200_DIFFUSE_SPECULAR_COMBINE;
+   }
+   else if (ctx->Light.Enabled) {
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= 
+        ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT));        
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_COLOR_0;
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LIGHTING_ENABLE;
+   } else if (ctx->Fog.ColorSumEnabled ) {
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= 
+        ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT) |
+         (R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT));        
+      p |=  R200_SPECULAR_ENABLE;
+   } else {
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= 
+        ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT));        
+   }
+
+   if (ctx->Fog.Enabled) {
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= 
+        ((R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT));        
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_COLOR_1;
+   }
+
+   if ( rmesa->hw.ctx.cmd[CTX_PP_CNTL] != p ) {
+      R200_STATECHANGE( rmesa, ctx );
+      rmesa->hw.ctx.cmd[CTX_PP_CNTL] = p;
+   }
+
+   /* Update vertex/render formats
+    */
+   if (rmesa->TclFallback) { 
+      r200ChooseRenderState( ctx );
+      r200ChooseVertexState( ctx );
+   }
+}
+
+
+/* =============================================================
+ * Materials
+ */
+
+
+/* Update on colormaterial, material emmissive/ambient, 
+ * lightmodel.globalambient
+ */
+static void update_global_ambient( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   float *fcmd = (float *)R200_DB_STATE( glt );
+
+   /* Need to do more if both emmissive & ambient are PREMULT:
+    */
+   if ((rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1] &
+       ((3 << R200_FRONT_EMISSIVE_SOURCE_SHIFT) |
+       (3 << R200_FRONT_AMBIENT_SOURCE_SHIFT))) == 0) 
+   {
+      COPY_3V( &fcmd[GLT_RED], 
+              ctx->Light.Material[0].Emission);
+      ACC_SCALE_3V( &fcmd[GLT_RED],
+                  ctx->Light.Model.Ambient,
+                  ctx->Light.Material[0].Ambient);
+   } 
+   else
+   {
+      COPY_3V( &fcmd[GLT_RED], ctx->Light.Model.Ambient );
+   }
+   
+   R200_DB_STATECHANGE(rmesa, &rmesa->hw.glt);
+}
+
+/* Update on change to 
+ *    - light[p].colors
+ *    - light[p].enabled
+ *    - material,
+ *    - colormaterial enabled
+ *    - colormaterial bitmask
+ */
+static void update_light_colors( GLcontext *ctx, GLuint p )
+{
+   struct gl_light *l = &ctx->Light.Light[p];
+
+/*     fprintf(stderr, "%s\n", __FUNCTION__); */
+
+   if (l->Enabled) {
+      r200ContextPtr rmesa = R200_CONTEXT(ctx);
+      float *fcmd = (float *)R200_DB_STATE( lit[p] );
+      GLuint bitmask = ctx->Light.ColorMaterialBitmask;
+      struct gl_material *mat = &ctx->Light.Material[0];
+
+      COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient );    
+      COPY_4V( &fcmd[LIT_DIFFUSE_RED], l->Diffuse );
+      COPY_4V( &fcmd[LIT_SPECULAR_RED], l->Specular );
+      
+      if (!ctx->Light.ColorMaterialEnabled)
+        bitmask = 0;
+
+      if ((bitmask & FRONT_AMBIENT_BIT) == 0) 
+        SELF_SCALE_3V( &fcmd[LIT_AMBIENT_RED], mat->Ambient );
+
+      if ((bitmask & FRONT_DIFFUSE_BIT) == 0) 
+        SELF_SCALE_3V( &fcmd[LIT_DIFFUSE_RED], mat->Diffuse );
+      
+      if ((bitmask & FRONT_SPECULAR_BIT) == 0) 
+        SELF_SCALE_3V( &fcmd[LIT_SPECULAR_RED], mat->Specular );
+
+      R200_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] );
+   }
+}
+
+/* Also fallback for asym colormaterial mode in twoside lighting...
+ */
+static void check_twoside_fallback( GLcontext *ctx )
+{
+   GLboolean fallback = GL_FALSE;
+
+   if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) {
+      if (memcmp( &ctx->Light.Material[0],
+                 &ctx->Light.Material[1],
+                 sizeof(struct gl_material)) != 0)
+        fallback = GL_TRUE;  
+      else if (ctx->Light.ColorMaterialEnabled &&
+              (ctx->Light.ColorMaterialBitmask & BACK_MATERIAL_BITS) != 
+              ((ctx->Light.ColorMaterialBitmask & FRONT_MATERIAL_BITS)<<1))
+        fallback = GL_TRUE;
+   }
+
+   TCL_FALLBACK( ctx, R200_TCL_FALLBACK_LIGHT_TWOSIDE, fallback );
+}
+
+static void r200ColorMaterial( GLcontext *ctx, GLenum face, GLenum mode )
+{
+   if (ctx->Light.ColorMaterialEnabled) {
+      r200ContextPtr rmesa = R200_CONTEXT(ctx);
+      GLuint light_model_ctl1 = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1];
+      GLuint mask = ctx->Light.ColorMaterialBitmask;
+
+      /* Default to PREMULT:
+       */
+      light_model_ctl1 &= ~((0xf << R200_FRONT_EMISSIVE_SOURCE_SHIFT) |
+                          (0xf << R200_FRONT_AMBIENT_SOURCE_SHIFT) |
+                          (0xf << R200_FRONT_DIFFUSE_SOURCE_SHIFT) |
+                          (0xf << R200_FRONT_SPECULAR_SOURCE_SHIFT)); 
+   
+      if (mask & FRONT_EMISSION_BIT) {
+        light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
+                            R200_FRONT_EMISSIVE_SOURCE_SHIFT);
+      }
+
+      if (mask & FRONT_AMBIENT_BIT) {
+        light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
+                            R200_FRONT_AMBIENT_SOURCE_SHIFT);
+      }
+        
+      if (mask & FRONT_DIFFUSE_BIT) {
+        light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
+                            R200_FRONT_DIFFUSE_SOURCE_SHIFT);
+      }
+   
+      if (mask & FRONT_SPECULAR_BIT) {
+        light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
+                            R200_FRONT_SPECULAR_SOURCE_SHIFT);
+      }
+   
+      if (light_model_ctl1 != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1]) {
+        GLuint p;
+
+        R200_STATECHANGE( rmesa, tcl );
+        rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1] = light_model_ctl1;      
+
+        for (p = 0 ; p < MAX_LIGHTS; p++) 
+           update_light_colors( ctx, p );
+        update_global_ambient( ctx );
+      }
+   }
+   
+   check_twoside_fallback( ctx );
+}
+
+void r200UpdateMaterial( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *fcmd = (GLfloat *)R200_DB_STATE( mtl[0] );
+   GLuint p;
+   GLuint mask = ~0;
+   
+   if (ctx->Light.ColorMaterialEnabled)
+      mask &= ~ctx->Light.ColorMaterialBitmask;
+
+   if (R200_DEBUG & DEBUG_STATE)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+      
+   if (mask & FRONT_EMISSION_BIT) {
+      fcmd[MTL_EMMISSIVE_RED]   = ctx->Light.Material[0].Emission[0];
+      fcmd[MTL_EMMISSIVE_GREEN] = ctx->Light.Material[0].Emission[1];
+      fcmd[MTL_EMMISSIVE_BLUE]  = ctx->Light.Material[0].Emission[2];
+      fcmd[MTL_EMMISSIVE_ALPHA] = ctx->Light.Material[0].Emission[3];
+   }
+   if (mask & FRONT_AMBIENT_BIT) {
+      fcmd[MTL_AMBIENT_RED]     = ctx->Light.Material[0].Ambient[0];
+      fcmd[MTL_AMBIENT_GREEN]   = ctx->Light.Material[0].Ambient[1];
+      fcmd[MTL_AMBIENT_BLUE]    = ctx->Light.Material[0].Ambient[2];
+      fcmd[MTL_AMBIENT_ALPHA]   = ctx->Light.Material[0].Ambient[3];
+   }
+   if (mask & FRONT_DIFFUSE_BIT) {
+      fcmd[MTL_DIFFUSE_RED]     = ctx->Light.Material[0].Diffuse[0];
+      fcmd[MTL_DIFFUSE_GREEN]   = ctx->Light.Material[0].Diffuse[1];
+      fcmd[MTL_DIFFUSE_BLUE]    = ctx->Light.Material[0].Diffuse[2];
+      fcmd[MTL_DIFFUSE_ALPHA]   = ctx->Light.Material[0].Diffuse[3];
+   }
+   if (mask & FRONT_SPECULAR_BIT) {
+      fcmd[MTL_SPECULAR_RED]    = ctx->Light.Material[0].Specular[0];
+      fcmd[MTL_SPECULAR_GREEN]  = ctx->Light.Material[0].Specular[1];
+      fcmd[MTL_SPECULAR_BLUE]   = ctx->Light.Material[0].Specular[2];
+      fcmd[MTL_SPECULAR_ALPHA]  = ctx->Light.Material[0].Specular[3];
+   }
+   if (mask & FRONT_SHININESS_BIT) {
+      fcmd[MTL_SHININESS]       = ctx->Light.Material[0].Shininess;
+   }
+
+   if (R200_DB_STATECHANGE( rmesa, &rmesa->hw.mtl[0] )) {
+      for (p = 0 ; p < MAX_LIGHTS; p++) 
+        update_light_colors( ctx, p );
+
+      check_twoside_fallback( ctx );
+      update_global_ambient( ctx );
+   }
+   else if (R200_DEBUG & (DEBUG_PRIMS|DEBUG_STATE))
+      fprintf(stderr, "%s: Elided noop material call\n", __FUNCTION__);
+}
+
+/* _NEW_LIGHT
+ * _NEW_MODELVIEW
+ * _MESA_NEW_NEED_EYE_COORDS
+ *
+ * Uses derived state from mesa:
+ *       _VP_inf_norm
+ *       _h_inf_norm
+ *       _Position
+ *       _NormDirection
+ *       _ModelViewInvScale
+ *       _NeedEyeCoords
+ *       _EyeZDir
+ *
+ * which are calculated in light.c and are correct for the current
+ * lighting space (model or eye), hence dependencies on _NEW_MODELVIEW
+ * and _MESA_NEW_NEED_EYE_COORDS.  
+ */
+static void update_light( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   /* Have to check these, or have an automatic shortcircuit mechanism
+    * to remove noop statechanges. (Or just do a better job on the
+    * front end).
+    */
+   {
+      GLuint tmp = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0];
+
+      if (ctx->_NeedEyeCoords)
+        tmp &= ~R200_LIGHT_IN_MODELSPACE;
+      else
+        tmp |= R200_LIGHT_IN_MODELSPACE;
+      
+      if (tmp != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0]) 
+      {
+        R200_STATECHANGE( rmesa, tcl );
+        rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] = tmp;
+      }
+   }
+
+   {
+      GLfloat *fcmd = (GLfloat *)R200_DB_STATE( eye );
+      fcmd[EYE_X] = ctx->_EyeZDir[0];
+      fcmd[EYE_Y] = ctx->_EyeZDir[1];
+      fcmd[EYE_Z] = - ctx->_EyeZDir[2];
+      fcmd[EYE_RESCALE_FACTOR] = ctx->_ModelViewInvScale;
+      R200_DB_STATECHANGE( rmesa, &rmesa->hw.eye );
+   }
+
+
+
+   if (ctx->Light.Enabled) {
+      GLint p;
+      for (p = 0 ; p < MAX_LIGHTS; p++) {
+        if (ctx->Light.Light[p].Enabled) {
+           struct gl_light *l = &ctx->Light.Light[p];
+           GLfloat *fcmd = (GLfloat *)R200_DB_STATE( lit[p] );
+           
+           if (l->EyePosition[3] == 0.0) {
+              COPY_3FV( &fcmd[LIT_POSITION_X], l->_VP_inf_norm ); 
+              COPY_3FV( &fcmd[LIT_DIRECTION_X], l->_h_inf_norm ); 
+              fcmd[LIT_POSITION_W] = 0;
+              fcmd[LIT_DIRECTION_W] = 0;
+           } else {
+              COPY_4V( &fcmd[LIT_POSITION_X], l->_Position );
+              fcmd[LIT_DIRECTION_X] = -l->_NormDirection[0];
+              fcmd[LIT_DIRECTION_Y] = -l->_NormDirection[1];
+              fcmd[LIT_DIRECTION_Z] = -l->_NormDirection[2];
+              fcmd[LIT_DIRECTION_W] = 0;
+           }
+
+           R200_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] );
+        }
+      }
+   }
+}
+
+static void r200Lightfv( GLcontext *ctx, GLenum light,
+                          GLenum pname, const GLfloat *params )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLint p = light - GL_LIGHT0;
+   struct gl_light *l = &ctx->Light.Light[p];
+   GLfloat *fcmd = (GLfloat *)rmesa->hw.lit[p].cmd;
+   
+
+   switch (pname) {
+   case GL_AMBIENT:            
+   case GL_DIFFUSE:
+   case GL_SPECULAR:
+      update_light_colors( ctx, p );
+      break;
+
+   case GL_SPOT_DIRECTION: 
+      /* picked up in update_light */  
+      break;
+
+   case GL_POSITION: {
+      /* positions picked up in update_light, but can do flag here */  
+      GLuint flag = (p&1)? R200_LIGHT_1_IS_LOCAL : R200_LIGHT_0_IS_LOCAL;
+      GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2;
+
+      R200_STATECHANGE(rmesa, tcl);
+      if (l->EyePosition[3] != 0.0F)
+        rmesa->hw.tcl.cmd[idx] |= flag;
+      else
+        rmesa->hw.tcl.cmd[idx] &= ~flag;
+      break;
+   }
+
+   case GL_SPOT_EXPONENT:
+      R200_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_SPOT_EXPONENT] = params[0];
+      break;
+
+   case GL_SPOT_CUTOFF: {
+      GLuint flag = (p&1) ? R200_LIGHT_1_IS_SPOT : R200_LIGHT_0_IS_SPOT;
+      GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2;
+
+      R200_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_SPOT_CUTOFF] = l->_CosCutoff;
+
+      R200_STATECHANGE(rmesa, tcl);
+      if (l->SpotCutoff != 180.0F)
+        rmesa->hw.tcl.cmd[idx] |= flag;
+      else
+        rmesa->hw.tcl.cmd[idx] &= ~flag;
+
+      break;
+   }
+
+   case GL_CONSTANT_ATTENUATION:
+      R200_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_ATTEN_CONST] = params[0];
+      break;
+   case GL_LINEAR_ATTENUATION:
+      R200_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_ATTEN_LINEAR] = params[0];
+      break;
+   case GL_QUADRATIC_ATTENUATION:
+      R200_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_ATTEN_QUADRATIC] = params[0];
+      break;
+   default:
+      return;
+   }
+
+}
+
+                 
+
+
+static void r200LightModelfv( GLcontext *ctx, GLenum pname,
+                               const GLfloat *param )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   switch (pname) {
+      case GL_LIGHT_MODEL_AMBIENT: 
+        update_global_ambient( ctx );
+        break;
+
+      case GL_LIGHT_MODEL_LOCAL_VIEWER:
+        R200_STATECHANGE( rmesa, tcl );
+        if (ctx->Light.Model.LocalViewer)
+           rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LOCAL_VIEWER;
+        else
+           rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_LOCAL_VIEWER;
+         break;
+
+      case GL_LIGHT_MODEL_TWO_SIDE:
+        R200_STATECHANGE( rmesa, tcl );
+        if (ctx->Light.Model.TwoSide)
+           rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LIGHT_TWOSIDE;
+        else
+           rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_LIGHT_TWOSIDE;
+
+        check_twoside_fallback( ctx );
+
+        if (rmesa->TclFallback) {
+           r200ChooseRenderState( ctx );
+           r200ChooseVertexState( ctx );
+        }
+         break;
+
+      case GL_LIGHT_MODEL_COLOR_CONTROL:
+        r200UpdateSpecular(ctx);
+         break;
+
+      default:
+         break;
+   }
+}
+
+static void r200ShadeModel( GLcontext *ctx, GLenum mode )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL];
+
+   s &= ~(R200_DIFFUSE_SHADE_MASK |
+         R200_ALPHA_SHADE_MASK |
+         R200_SPECULAR_SHADE_MASK |
+         R200_FOG_SHADE_MASK);
+
+   switch ( mode ) {
+   case GL_FLAT:
+      s |= (R200_DIFFUSE_SHADE_FLAT |
+           R200_ALPHA_SHADE_FLAT |
+           R200_SPECULAR_SHADE_FLAT |
+           R200_FOG_SHADE_FLAT);
+      break;
+   case GL_SMOOTH:
+      s |= (R200_DIFFUSE_SHADE_GOURAUD |
+           R200_ALPHA_SHADE_GOURAUD |
+           R200_SPECULAR_SHADE_GOURAUD |
+           R200_FOG_SHADE_GOURAUD);
+      break;
+   default:
+      return;
+   }
+
+   if ( rmesa->hw.set.cmd[SET_SE_CNTL] != s ) {
+      R200_STATECHANGE( rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_CNTL] = s;
+   }
+}
+
+
+/* =============================================================
+ * User clip planes
+ */
+
+static void r200ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq )
+{
+   GLint p = (GLint) plane - (GLint) GL_CLIP_PLANE0;
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p];
+
+   R200_STATECHANGE( rmesa, ucp[p] );
+   rmesa->hw.ucp[p].cmd[UCP_X] = ip[0];
+   rmesa->hw.ucp[p].cmd[UCP_Y] = ip[1];
+   rmesa->hw.ucp[p].cmd[UCP_Z] = ip[2];
+   rmesa->hw.ucp[p].cmd[UCP_W] = ip[3];
+}
+
+static void r200UpdateClipPlanes( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint p;
+
+   for (p = 0; p < ctx->Const.MaxClipPlanes; p++) {
+      if (ctx->Transform.ClipPlanesEnabled & (1 << p)) {
+        GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p];
+
+        R200_STATECHANGE( rmesa, ucp[p] );
+        rmesa->hw.ucp[p].cmd[UCP_X] = ip[0];
+        rmesa->hw.ucp[p].cmd[UCP_Y] = ip[1];
+        rmesa->hw.ucp[p].cmd[UCP_Z] = ip[2];
+        rmesa->hw.ucp[p].cmd[UCP_W] = ip[3];
+      }
+   }
+}
+
+
+/* =============================================================
+ * Stencil
+ */
+
+static void r200StencilFunc( GLcontext *ctx, GLenum func,
+                              GLint ref, GLuint mask )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint refmask = ((ctx->Stencil.Ref[0] << R200_STENCIL_REF_SHIFT) |
+                    (ctx->Stencil.ValueMask[0] << R200_STENCIL_MASK_SHIFT));
+
+   R200_STATECHANGE( rmesa, ctx );
+   R200_STATECHANGE( rmesa, msk );
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~R200_STENCIL_TEST_MASK;
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] &= ~(R200_STENCIL_REF_MASK|
+                                                  R200_STENCIL_VALUE_MASK);
+
+   switch ( ctx->Stencil.Function[0] ) {
+   case GL_NEVER:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_NEVER;
+      break;
+   case GL_LESS:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_LESS;
+      break;
+   case GL_EQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_EQUAL;
+      break;
+   case GL_LEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_LEQUAL;
+      break;
+   case GL_GREATER:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_GREATER;
+      break;
+   case GL_NOTEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_NEQUAL;
+      break;
+   case GL_GEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_GEQUAL;
+      break;
+   case GL_ALWAYS:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_ALWAYS;
+      break;
+   }
+
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] |= refmask;
+}
+
+static void r200StencilMask( GLcontext *ctx, GLuint mask )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   R200_STATECHANGE( rmesa, msk );
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] &= ~R200_STENCIL_WRITE_MASK;
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] |=
+      (ctx->Stencil.WriteMask[0] << R200_STENCIL_WRITEMASK_SHIFT);
+}
+
+static void r200StencilOp( GLcontext *ctx, GLenum fail,
+                            GLenum zfail, GLenum zpass )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   R200_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~(R200_STENCIL_FAIL_MASK |
+                                              R200_STENCIL_ZFAIL_MASK |
+                                              R200_STENCIL_ZPASS_MASK);
+
+   switch ( ctx->Stencil.FailFunc[0] ) {
+   case GL_KEEP:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_KEEP;
+      break;
+   case GL_ZERO:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_ZERO;
+      break;
+   case GL_REPLACE:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_REPLACE;
+      break;
+   case GL_INCR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_INC;
+      break;
+   case GL_DECR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_DEC;
+      break;
+   case GL_INCR_WRAP_EXT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_INC_WRAP;
+      break;
+   case GL_DECR_WRAP_EXT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_DEC_WRAP;
+      break;
+   case GL_INVERT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_INVERT;
+      break;
+   }
+
+   switch ( ctx->Stencil.ZFailFunc[0] ) {
+   case GL_KEEP:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_KEEP;
+      break;
+   case GL_ZERO:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_ZERO;
+      break;
+   case GL_REPLACE:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_REPLACE;
+      break;
+   case GL_INCR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_INC;
+      break;
+   case GL_DECR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_DEC;
+      break;
+   case GL_INCR_WRAP_EXT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_INC_WRAP;
+      break;
+   case GL_DECR_WRAP_EXT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_DEC_WRAP;
+      break;
+   case GL_INVERT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_INVERT;
+      break;
+   }
+
+   switch ( ctx->Stencil.ZPassFunc[0] ) {
+   case GL_KEEP:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_KEEP;
+      break;
+   case GL_ZERO:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_ZERO;
+      break;
+   case GL_REPLACE:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_REPLACE;
+      break;
+   case GL_INCR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_INC;
+      break;
+   case GL_DECR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_DEC;
+      break;
+   case GL_INCR_WRAP_EXT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_INC_WRAP;
+      break;
+   case GL_DECR_WRAP_EXT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_DEC_WRAP;
+      break;
+   case GL_INVERT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_INVERT;
+      break;
+   }
+}
+
+static void r200ClearStencil( GLcontext *ctx, GLint s )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   rmesa->state.stencil.clear = 
+      ((GLuint) ctx->Stencil.Clear |
+       (0xff << R200_STENCIL_MASK_SHIFT) |
+       (ctx->Stencil.WriteMask[0] << R200_STENCIL_WRITEMASK_SHIFT));
+}
+
+
+/* =============================================================
+ * Window position and viewport transformation
+ */
+
+/*
+ * To correctly position primitives:
+ */
+#define SUBPIXEL_X 0.125
+#define SUBPIXEL_Y 0.125
+
+void r200UpdateWindow( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+   GLfloat xoffset = (GLfloat)dPriv->x;
+   GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+
+   GLfloat sx = v[MAT_SX];
+   GLfloat tx = v[MAT_TX] + xoffset + SUBPIXEL_X;
+   GLfloat sy = - v[MAT_SY];
+   GLfloat ty = (- v[MAT_TY]) + yoffset + SUBPIXEL_Y;
+   GLfloat sz = v[MAT_SZ] * rmesa->state.depth.scale;
+   GLfloat tz = v[MAT_TZ] * rmesa->state.depth.scale;
+
+   R200_FIREVERTICES( rmesa );
+   R200_STATECHANGE( rmesa, vpt );
+
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE]  = *(GLuint *)&sx;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = *(GLuint *)&tx;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YSCALE]  = *(GLuint *)&sy;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = *(GLuint *)&ty;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZSCALE]  = *(GLuint *)&sz;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZOFFSET] = *(GLuint *)&tz;
+}
+
+
+
+static void r200Viewport( GLcontext *ctx, GLint x, GLint y,
+                           GLsizei width, GLsizei height )
+{
+   /* Don't pipeline viewport changes, conflict with window offset
+    * setting below.  Could apply deltas to rescue pipelined viewport
+    * values, or keep the originals hanging around.
+    */
+   R200_FIREVERTICES( R200_CONTEXT(ctx) );
+   r200UpdateWindow( ctx );
+}
+
+static void r200DepthRange( GLcontext *ctx, GLclampd nearval,
+                             GLclampd farval )
+{
+   r200UpdateWindow( ctx );
+}
+
+void r200UpdateViewportOffset( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+   GLfloat xoffset = (GLfloat)dPriv->x;
+   GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+
+   GLfloat tx = v[MAT_TX] + xoffset;
+   GLfloat ty = (- v[MAT_TY]) + yoffset;
+
+   if ( rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] != *(GLuint *)&tx ||
+       rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] != *(GLuint *)&ty )
+   {
+      /* Note: this should also modify whatever data the context reset
+       * code uses...
+       */
+      rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = *(GLuint *)&tx;
+      rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = *(GLuint *)&ty;
+      
+      /* update polygon stipple x/y screen offset */
+      {
+         GLuint stx, sty;
+         GLuint m = rmesa->hw.msc.cmd[MSC_RE_MISC];
+
+         m &= ~(R200_STIPPLE_X_OFFSET_MASK |
+                R200_STIPPLE_Y_OFFSET_MASK);
+
+         /* add magic offsets, then invert */
+         stx = 31 - ((rmesa->dri.drawable->x - 1) & R200_STIPPLE_COORD_MASK);
+         sty = 31 - ((rmesa->dri.drawable->y + rmesa->dri.drawable->h - 1)
+                     & R200_STIPPLE_COORD_MASK);
+
+         m |= ((stx << R200_STIPPLE_X_OFFSET_SHIFT) |
+               (sty << R200_STIPPLE_Y_OFFSET_SHIFT));
+
+         if ( rmesa->hw.msc.cmd[MSC_RE_MISC] != m ) {
+            R200_STATECHANGE( rmesa, msc );
+           rmesa->hw.msc.cmd[MSC_RE_MISC] = m;
+         }
+      }
+   }
+
+   r200UpdateScissor( ctx );
+}
+
+
+
+/* =============================================================
+ * Miscellaneous
+ */
+
+static void r200ClearColor( GLcontext *ctx, const GLfloat c[4] )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLubyte color[4];
+   CLAMPED_FLOAT_TO_UBYTE(color[0], c[0]);
+   CLAMPED_FLOAT_TO_UBYTE(color[1], c[1]);
+   CLAMPED_FLOAT_TO_UBYTE(color[2], c[2]);
+   CLAMPED_FLOAT_TO_UBYTE(color[3], c[3]);
+   rmesa->state.color.clear = r200PackColor( rmesa->r200Screen->cpp,
+                                             color[0], color[1],
+                                             color[2], color[3] );
+}
+
+
+static void r200RenderMode( GLcontext *ctx, GLenum mode )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   FALLBACK( rmesa, R200_FALLBACK_RENDER_MODE, (mode != GL_RENDER) );
+}
+
+
+static GLuint r200_rop_tab[] = {
+   R200_ROP_CLEAR,
+   R200_ROP_AND,
+   R200_ROP_AND_REVERSE,
+   R200_ROP_COPY,
+   R200_ROP_AND_INVERTED,
+   R200_ROP_NOOP,
+   R200_ROP_XOR,
+   R200_ROP_OR,
+   R200_ROP_NOR,
+   R200_ROP_EQUIV,
+   R200_ROP_INVERT,
+   R200_ROP_OR_REVERSE,
+   R200_ROP_COPY_INVERTED,
+   R200_ROP_OR_INVERTED,
+   R200_ROP_NAND,
+   R200_ROP_SET,
+};
+
+static void r200LogicOpCode( GLcontext *ctx, GLenum opcode )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint rop = (GLuint)opcode - GL_CLEAR;
+
+   ASSERT( rop < 16 );
+
+   R200_STATECHANGE( rmesa, msk );
+   rmesa->hw.msk.cmd[MSK_RB3D_ROPCNTL] = r200_rop_tab[rop];
+}
+
+
+void r200SetCliprects( r200ContextPtr rmesa, GLenum mode )
+{
+   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+
+   switch ( mode ) {
+   case GL_FRONT_LEFT:
+      rmesa->numClipRects = dPriv->numClipRects;
+      rmesa->pClipRects = (XF86DRIClipRectPtr)dPriv->pClipRects;
+      break;
+   case GL_BACK_LEFT:
+      /* Can't ignore 2d windows if we are page flipping.
+       */
+      if ( dPriv->numBackClipRects == 0 || rmesa->doPageFlip ) {
+        rmesa->numClipRects = dPriv->numClipRects;
+        rmesa->pClipRects = (XF86DRIClipRectPtr)dPriv->pClipRects;
+      }
+      else {
+        rmesa->numClipRects = dPriv->numBackClipRects;
+        rmesa->pClipRects = (XF86DRIClipRectPtr)dPriv->pBackClipRects;
+      }
+      break;
+   default:
+      fprintf(stderr, "bad mode in r200SetCliprects\n");
+      return;
+   }
+
+   if (rmesa->state.scissor.enabled)
+      r200RecalcScissorRects( rmesa );
+}
+
+
+static void r200DrawBuffer( GLcontext *ctx, GLenum mode )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   if (R200_DEBUG & DEBUG_DRI)
+      fprintf(stderr, "%s %s\n", __FUNCTION__,
+             _mesa_lookup_enum_by_nr( mode ));
+
+   R200_FIREVERTICES(rmesa);   /* don't pipeline cliprect changes */
+
+   /*
+    * _DrawDestMask is easier to cope with than <mode>.
+    */
+   switch ( ctx->Color._DrawDestMask ) {
+   case FRONT_LEFT_BIT:
+      FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_FALSE );
+      r200SetCliprects( rmesa, GL_FRONT_LEFT );
+      break;
+   case BACK_LEFT_BIT:
+      FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_FALSE );
+      r200SetCliprects( rmesa, GL_BACK_LEFT );
+      break;
+   default:
+      /* GL_NONE or GL_FRONT_AND_BACK or stereo left&right, etc */
+      FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_TRUE );
+      return;
+   }
+
+   /* We want to update the s/w rast state too so that r200SetBuffer()
+    * gets called.
+    */
+   _swrast_DrawBuffer(ctx, mode);
+
+   R200_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = (rmesa->state.color.drawOffset &
+                                           R200_COLOROFFSET_MASK);
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = rmesa->state.color.drawPitch;
+}
+
+
+static void r200ReadBuffer( GLcontext *ctx, GLenum mode )
+{
+   /* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */
+}
+
+/* =============================================================
+ * State enable/disable
+ */
+
+static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint p, flag;
+
+   if ( R200_DEBUG & DEBUG_STATE )
+      fprintf( stderr, "%s( %s = %s )\n", __FUNCTION__,
+              _mesa_lookup_enum_by_nr( cap ),
+              state ? "GL_TRUE" : "GL_FALSE" );
+
+   switch ( cap ) {
+      /* Fast track this one...
+       */
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_3D:
+      break;
+
+   case GL_ALPHA_TEST:
+      R200_STATECHANGE( rmesa, ctx );
+      if (state) {
+        rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_ALPHA_TEST_ENABLE;
+      } else {
+        rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~R200_ALPHA_TEST_ENABLE;
+      }
+      break;
+
+   case GL_BLEND:
+      R200_STATECHANGE( rmesa, ctx );
+      if (state) {
+        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_ALPHA_BLEND_ENABLE;
+      } else {
+        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_ALPHA_BLEND_ENABLE;
+      }
+      if ( ctx->Color.ColorLogicOpEnabled ) {
+        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_ROP_ENABLE;
+      } else {
+        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_ROP_ENABLE;
+      }
+      break;
+
+   case GL_CLIP_PLANE0:
+   case GL_CLIP_PLANE1:
+   case GL_CLIP_PLANE2:
+   case GL_CLIP_PLANE3:
+   case GL_CLIP_PLANE4:
+   case GL_CLIP_PLANE5: 
+      p = cap-GL_CLIP_PLANE0;
+      R200_STATECHANGE( rmesa, tcl );
+      if (state) {
+        rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (R200_UCP_ENABLE_0<<p);
+        r200ClipPlane( ctx, cap, NULL );
+      }
+      else {
+        rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(R200_UCP_ENABLE_0<<p);
+      }
+      break;
+
+   case GL_COLOR_MATERIAL:
+      r200ColorMaterial( ctx, 0, 0 );
+      if (!state) 
+        r200UpdateMaterial( ctx );
+      break;
+
+   case GL_CULL_FACE:
+      r200CullFace( ctx, 0 );
+      break;
+
+   case GL_DEPTH_TEST:
+      R200_STATECHANGE(rmesa, ctx );
+      if ( state ) {
+        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_Z_ENABLE;
+      } else {
+        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_Z_ENABLE;
+      }
+      break;
+
+   case GL_DITHER:
+      R200_STATECHANGE(rmesa, ctx );
+      if ( state ) {
+        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_DITHER_ENABLE;
+      } else {
+        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_DITHER_ENABLE;
+      }
+      break;
+
+   case GL_FOG:
+      R200_STATECHANGE(rmesa, ctx );
+      if ( state ) {
+        rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_FOG_ENABLE;
+        r200Fogfv( ctx, GL_FOG_MODE, 0 );
+      } else {
+        rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~R200_FOG_ENABLE;
+        R200_STATECHANGE(rmesa, tcl);
+        rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~R200_TCL_FOG_MASK;
+      }
+      r200UpdateSpecular( ctx ); /* for PK_SPEC */
+      break;
+
+   case GL_LIGHT0:
+   case GL_LIGHT1:
+   case GL_LIGHT2:
+   case GL_LIGHT3:
+   case GL_LIGHT4:
+   case GL_LIGHT5:
+   case GL_LIGHT6:
+   case GL_LIGHT7:
+      R200_STATECHANGE(rmesa, tcl);
+      p = cap - GL_LIGHT0;
+      if (p&1) 
+        flag = (R200_LIGHT_1_ENABLE |
+                R200_LIGHT_1_ENABLE_AMBIENT | 
+                R200_LIGHT_1_ENABLE_SPECULAR);
+      else
+        flag = (R200_LIGHT_0_ENABLE |
+                R200_LIGHT_0_ENABLE_AMBIENT | 
+                R200_LIGHT_0_ENABLE_SPECULAR);
+
+      if (state)
+        rmesa->hw.tcl.cmd[p/2 + TCL_PER_LIGHT_CTL_0] |= flag;
+      else
+        rmesa->hw.tcl.cmd[p/2 + TCL_PER_LIGHT_CTL_0] &= ~flag;
+
+      /* 
+       */
+      update_light_colors( ctx, p );
+      break;
+
+   case GL_LIGHTING:
+      r200UpdateSpecular(ctx);
+      check_twoside_fallback( ctx );
+      break;
+
+   case GL_LINE_SMOOTH:
+      R200_STATECHANGE( rmesa, ctx );
+      if ( state ) {
+        rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=  R200_ANTI_ALIAS_LINE;
+      } else {
+        rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~R200_ANTI_ALIAS_LINE;
+      }
+      break;
+
+   case GL_LINE_STIPPLE:
+      R200_STATECHANGE( rmesa, set );
+      if ( state ) {
+        rmesa->hw.set.cmd[SET_RE_CNTL] |=  R200_PATTERN_ENABLE;
+      } else {
+        rmesa->hw.set.cmd[SET_RE_CNTL] &= ~R200_PATTERN_ENABLE;
+      }
+      break;
+
+   case GL_COLOR_LOGIC_OP:
+      R200_STATECHANGE( rmesa, ctx );
+      if ( state ) {
+        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_ROP_ENABLE;
+      } else {
+        rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_ROP_ENABLE;
+      }
+      break;
+      
+   case GL_NORMALIZE:
+      R200_STATECHANGE( rmesa, tcl );
+      if ( state ) {
+        rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |=  R200_NORMALIZE_NORMALS;
+      } else {
+        rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_NORMALIZE_NORMALS;
+      }
+      break;
+
+      /* Pointsize registers on r200 don't seem to do anything.  Maybe
+       * have to pass pointsizes as vertex parameters?  In any case,
+       * setting pointmin == pointsizemax == 1.0, and doing nothing
+       * for aa is enough to satisfy conform.
+       */
+   case GL_POINT_SMOOTH:
+      break;
+
+      /* These don't really do anything, as we don't use the 3vtx
+       * primitives yet.
+       */
+#if 0
+   case GL_POLYGON_OFFSET_POINT:
+      R200_STATECHANGE( rmesa, set );
+      if ( state ) {
+        rmesa->hw.set.cmd[SET_SE_CNTL] |=  R200_ZBIAS_ENABLE_POINT;
+      } else {
+        rmesa->hw.set.cmd[SET_SE_CNTL] &= ~R200_ZBIAS_ENABLE_POINT;
+      }
+      break;
+
+   case GL_POLYGON_OFFSET_LINE:
+      R200_STATECHANGE( rmesa, set );
+      if ( state ) {
+        rmesa->hw.set.cmd[SET_SE_CNTL] |=  R200_ZBIAS_ENABLE_LINE;
+      } else {
+        rmesa->hw.set.cmd[SET_SE_CNTL] &= ~R200_ZBIAS_ENABLE_LINE;
+      }
+      break;
+#endif
+
+   case GL_POLYGON_OFFSET_FILL:
+      R200_STATECHANGE( rmesa, set );
+      if ( state ) {
+        rmesa->hw.set.cmd[SET_SE_CNTL] |=  R200_ZBIAS_ENABLE_TRI;
+      } else {
+        rmesa->hw.set.cmd[SET_SE_CNTL] &= ~R200_ZBIAS_ENABLE_TRI;
+      }
+      break;
+
+   case GL_POLYGON_SMOOTH:
+      R200_STATECHANGE( rmesa, ctx );
+      if ( state ) {
+        rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=  R200_ANTI_ALIAS_POLY;
+      } else {
+        rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~R200_ANTI_ALIAS_POLY;
+      }
+      break;
+
+   case GL_POLYGON_STIPPLE:
+      R200_STATECHANGE(rmesa, set );
+      if ( state ) {
+        rmesa->hw.set.cmd[SET_RE_CNTL] |=  R200_STIPPLE_ENABLE;
+      } else {
+        rmesa->hw.set.cmd[SET_RE_CNTL] &= ~R200_STIPPLE_ENABLE;
+      }
+      break;
+
+   case GL_RESCALE_NORMAL_EXT: {
+      GLboolean tmp = ctx->_NeedEyeCoords ? state : !state;
+      R200_STATECHANGE( rmesa, tcl );
+      if ( tmp ) {
+        rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |=  R200_RESCALE_NORMALS;
+      } else {
+        rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_RESCALE_NORMALS;
+      }
+      break;
+   }
+
+   case GL_SCISSOR_TEST:
+      R200_FIREVERTICES( rmesa );
+      rmesa->state.scissor.enabled = state;
+      r200UpdateScissor( ctx );
+      break;
+
+   case GL_STENCIL_TEST:
+      if ( rmesa->state.stencil.hwBuffer ) {
+        R200_STATECHANGE( rmesa, ctx );
+        if ( state ) {
+           rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_STENCIL_ENABLE;
+        } else {
+           rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_STENCIL_ENABLE;
+        }
+      } else {
+        FALLBACK( rmesa, R200_FALLBACK_STENCIL, state );
+      }
+      break;
+
+   case GL_TEXTURE_GEN_Q:
+   case GL_TEXTURE_GEN_R:
+   case GL_TEXTURE_GEN_S:
+   case GL_TEXTURE_GEN_T:
+      /* Picked up in r200UpdateTextureState.
+       */
+      rmesa->recheck_texgen[ctx->Texture.CurrentUnit] = GL_TRUE; 
+      break;
+
+   case GL_COLOR_SUM_EXT:
+      r200UpdateSpecular ( ctx );
+      break;
+
+   default:
+      return;
+   }
+}
+
+
+void r200LightingSpaceChange( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLboolean tmp;
+
+   if (R200_DEBUG & DEBUG_STATE) 
+      fprintf(stderr, "%s %d BEFORE %x\n", __FUNCTION__, ctx->_NeedEyeCoords,
+             rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0]);
+
+   if (ctx->_NeedEyeCoords)
+      tmp = ctx->Transform.RescaleNormals;
+   else
+      tmp = !ctx->Transform.RescaleNormals;
+
+   R200_STATECHANGE( rmesa, tcl );
+   if ( tmp ) {
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |=  R200_RESCALE_NORMALS;
+   } else {
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_RESCALE_NORMALS;
+   }
+
+   if (R200_DEBUG & DEBUG_STATE) 
+      fprintf(stderr, "%s %d AFTER %x\n", __FUNCTION__, ctx->_NeedEyeCoords,
+             rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0]);
+}
+
+/* =============================================================
+ * Deferred state management - matrices, textures, other?
+ */
+
+
+
+
+static void upload_matrix( r200ContextPtr rmesa, GLfloat *src, int idx )
+{
+   float *dest = ((float *)R200_DB_STATE( mat[idx] ))+MAT_ELT_0;
+   int i;
+
+
+   for (i = 0 ; i < 4 ; i++) {
+      *dest++ = src[i];
+      *dest++ = src[i+4];
+      *dest++ = src[i+8];
+      *dest++ = src[i+12];
+   }
+
+   R200_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] );
+}
+
+static void upload_matrix_t( r200ContextPtr rmesa, const GLfloat *src, int idx )
+{
+   float *dest = ((float *)R200_DB_STATE( mat[idx] ))+MAT_ELT_0;
+   memcpy(dest, src, 16*sizeof(float));
+   R200_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] );
+}
+
+
+static void update_texturematrix( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+   GLuint tpc = rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_0];
+   GLuint compsel = rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL];
+   int unit;
+
+   if (R200_DEBUG & DEBUG_STATE) 
+      fprintf(stderr, "%s before COMPSEL: %x\n", __FUNCTION__,
+             rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL]);
+
+   rmesa->TexMatEnabled = 0;
+   rmesa->TexMatCompSel = 0;
+
+   for (unit = 0 ; unit < 2; unit++) {
+      if (!ctx->Texture.Unit[unit]._ReallyEnabled) 
+        continue;
+
+      if (ctx->TextureMatrixStack[unit].Top->type != MATRIX_IDENTITY) {
+        rmesa->TexMatEnabled |= (R200_TEXGEN_TEXMAT_0_ENABLE|
+                                 R200_TEXMAT_0_ENABLE) << unit;
+
+        rmesa->TexMatCompSel |= R200_OUTPUT_TEX_0 << unit;
+
+        if (rmesa->TexGenEnabled & (R200_TEXMAT_0_ENABLE << unit)) {
+           /* Need to preconcatenate any active texgen 
+            * obj/eyeplane matrices:
+            */
+           _math_matrix_mul_matrix( &rmesa->tmpmat, 
+                                    &rmesa->TexGenMatrix[unit],
+                                    ctx->TextureMatrixStack[unit].Top );
+           upload_matrix( rmesa, rmesa->tmpmat.m, R200_MTX_TEX0+unit );
+        } 
+        else {
+           upload_matrix( rmesa, ctx->TextureMatrixStack[unit].Top->m, 
+                          R200_MTX_TEX0+unit );
+        }
+      }
+      else if (rmesa->TexGenEnabled & (R200_TEXMAT_0_ENABLE << unit)) {
+        upload_matrix( rmesa, rmesa->TexGenMatrix[unit].m, 
+                       R200_MTX_TEX0+unit );
+      }
+   }
+
+   tpc = (rmesa->TexMatEnabled | rmesa->TexGenEnabled);
+   if (tpc != rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_0] ||
+       rmesa->TexGenInputs != rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_1]) {
+      R200_STATECHANGE(rmesa, tcg);
+      rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_0] = tpc;
+      rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_1] = rmesa->TexGenInputs;
+   }
+
+   compsel &= ~R200_OUTPUT_TEX_MASK;
+   compsel |= rmesa->TexMatCompSel | rmesa->TexGenCompSel;
+   if (compsel != rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL]) {
+      R200_STATECHANGE(rmesa, vtx);
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] = compsel;
+   }
+}
+
+
+
+void r200ValidateState( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint new_state = rmesa->NewGLState;
+
+   if (new_state & _NEW_TEXTURE) {
+      r200UpdateTextureState( ctx );
+      new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */
+   }
+
+   /* Need an event driven matrix update?
+    */
+   if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION)) 
+      upload_matrix( rmesa, ctx->_ModelProjectMatrix.m, R200_MTX_MVP );
+
+   /* Need these for lighting (shouldn't upload otherwise)
+    */
+   if (new_state & (_NEW_MODELVIEW)) {
+      upload_matrix( rmesa, ctx->ModelviewMatrixStack.Top->m, R200_MTX_MV );
+      upload_matrix_t( rmesa, ctx->ModelviewMatrixStack.Top->inv, R200_MTX_IMV );
+   }
+
+   /* Does this need to be triggered on eg. modelview for
+    * texgen-derived objplane/eyeplane matrices?
+    */
+   if (new_state & (_NEW_TEXTURE|_NEW_TEXTURE_MATRIX)) {
+      update_texturematrix( ctx );
+   }      
+
+   if (new_state & (_NEW_LIGHT|_NEW_MODELVIEW|_MESA_NEW_NEED_EYE_COORDS)) {
+      update_light( ctx );
+   }
+
+   /* emit all active clip planes if projection matrix changes.
+    */
+   if (new_state & (_NEW_PROJECTION)) {
+      if (ctx->Transform.ClipPlanesEnabled) 
+        r200UpdateClipPlanes( ctx );
+   }
+
+
+   rmesa->NewGLState = 0;
+}
+
+
+static void r200InvalidateState( GLcontext *ctx, GLuint new_state )
+{
+   _swrast_InvalidateState( ctx, new_state );
+   _swsetup_InvalidateState( ctx, new_state );
+   _ac_InvalidateState( ctx, new_state );
+   _tnl_InvalidateState( ctx, new_state );
+   _ae_invalidate_state( ctx, new_state );
+   R200_CONTEXT(ctx)->NewGLState |= new_state;
+   r200VtxfmtInvalidate( ctx );
+}
+
+static void r200WrapRunPipeline( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+
+   if (0)
+      fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->NewGLState);
+
+   /* Validate state:
+    */
+   if (rmesa->NewGLState)
+      r200ValidateState( ctx );
+
+   if (tnl->vb.Material) {
+      TCL_FALLBACK( ctx, R200_TCL_FALLBACK_MATERIAL, GL_TRUE );
+   }
+
+   /* Run the pipeline.
+    */ 
+   _tnl_run_pipeline( ctx );
+
+   if (tnl->vb.Material) {
+      TCL_FALLBACK( ctx, R200_TCL_FALLBACK_MATERIAL, GL_FALSE );
+      r200UpdateMaterial( ctx ); /* not needed any more? */
+   }
+}
+
+
+/* Initialize the driver's state functions.
+ */
+void r200InitStateFuncs( GLcontext *ctx )
+{
+   ctx->Driver.UpdateState             = r200InvalidateState;
+   ctx->Driver.LightingSpaceChange      = r200LightingSpaceChange;
+
+   ctx->Driver.DrawBuffer              = r200DrawBuffer;
+   ctx->Driver.ReadBuffer              = r200ReadBuffer;
+
+   ctx->Driver.AlphaFunc               = r200AlphaFunc;
+   ctx->Driver.BlendEquation           = r200BlendEquation;
+   ctx->Driver.BlendFunc               = r200BlendFunc;
+   ctx->Driver.BlendFuncSeparate       = r200BlendFuncSeparate;
+   ctx->Driver.ClearColor              = r200ClearColor;
+   ctx->Driver.ClearDepth              = NULL;
+   ctx->Driver.ClearIndex              = NULL;
+   ctx->Driver.ClearStencil            = r200ClearStencil;
+   ctx->Driver.ClipPlane               = r200ClipPlane;
+   ctx->Driver.ColorMask               = r200ColorMask;
+   ctx->Driver.CullFace                        = r200CullFace;
+   ctx->Driver.DepthFunc               = r200DepthFunc;
+   ctx->Driver.DepthMask               = r200DepthMask;
+   ctx->Driver.DepthRange              = r200DepthRange;
+   ctx->Driver.Enable                  = r200Enable;
+   ctx->Driver.Fogfv                   = r200Fogfv;
+   ctx->Driver.FrontFace               = r200FrontFace;
+   ctx->Driver.Hint                    = NULL;
+   ctx->Driver.IndexMask               = NULL;
+   ctx->Driver.LightModelfv            = r200LightModelfv;
+   ctx->Driver.Lightfv                 = r200Lightfv;
+   ctx->Driver.LineStipple              = r200LineStipple;
+   ctx->Driver.LineWidth                = r200LineWidth;
+   ctx->Driver.LogicOpcode             = r200LogicOpCode;
+   ctx->Driver.PolygonMode             = r200PolygonMode;
+   ctx->Driver.PolygonOffset           = r200PolygonOffset;
+   ctx->Driver.PolygonStipple          = r200PolygonStipple;
+   ctx->Driver.PointSize                = r200PointSize;
+   ctx->Driver.RenderMode              = r200RenderMode;
+   ctx->Driver.Scissor                 = r200Scissor;
+   ctx->Driver.ShadeModel              = r200ShadeModel;
+   ctx->Driver.StencilFunc             = r200StencilFunc;
+   ctx->Driver.StencilMask             = r200StencilMask;
+   ctx->Driver.StencilOp               = r200StencilOp;
+   ctx->Driver.Viewport                        = r200Viewport;
+
+   /* Swrast hooks for imaging extensions:
+    */
+   ctx->Driver.CopyColorTable          = _swrast_CopyColorTable;
+   ctx->Driver.CopyColorSubTable       = _swrast_CopyColorSubTable;
+   ctx->Driver.CopyConvolutionFilter1D = _swrast_CopyConvolutionFilter1D;
+   ctx->Driver.CopyConvolutionFilter2D = _swrast_CopyConvolutionFilter2D;
+
+   TNL_CONTEXT(ctx)->Driver.NotifyMaterialChange = r200UpdateMaterial;
+   TNL_CONTEXT(ctx)->Driver.RunPipeline = r200WrapRunPipeline;
+}
diff --git a/src/mesa/drivers/dri/r200/r200_state.h b/src/mesa/drivers/dri/r200/r200_state.h
new file mode 100644 (file)
index 0000000..fe2a7dc
--- /dev/null
@@ -0,0 +1,70 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef __R200_STATE_H__
+#define __R200_STATE_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+#include "r200_context.h"
+
+extern void r200InitState( r200ContextPtr rmesa );
+extern void r200InitStateFuncs( GLcontext *ctx );
+
+extern void r200UpdateMaterial( GLcontext *ctx );
+
+extern void r200SetCliprects( r200ContextPtr rmesa, GLenum mode );
+extern void r200RecalcScissorRects( r200ContextPtr rmesa );
+extern void r200UpdateViewportOffset( GLcontext *ctx );
+extern void r200UpdateWindow( GLcontext *ctx );
+
+extern void r200ValidateState( GLcontext *ctx );
+
+extern void r200PrintDirty( r200ContextPtr rmesa,
+                             const char *msg );
+
+
+extern void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode );
+#define FALLBACK( rmesa, bit, mode ) do {                              \
+   if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n",               \
+                    __FUNCTION__, bit, mode );                         \
+   r200Fallback( rmesa->glCtx, bit, mode );                            \
+} while (0)
+
+extern void r200LightingSpaceChange( GLcontext *ctx );
+
+#endif
+#endif
diff --git a/src/mesa/drivers/dri/r200/r200_state_init.c b/src/mesa/drivers/dri/r200/r200_state_init.c
new file mode 100644 (file)
index 0000000..ba445d5
--- /dev/null
@@ -0,0 +1,691 @@
+/* $XFree86$ */
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "glheader.h"
+#include "imports.h"
+#include "enums.h"
+#include "colormac.h"
+#include "api_arrayelt.h"
+
+#include "swrast/swrast.h"
+#include "array_cache/acache.h"
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+#include "swrast_setup/swrast_setup.h"
+
+#include "r200_context.h"
+#include "r200_ioctl.h"
+#include "r200_state.h"
+#include "r200_tcl.h"
+#include "r200_tex.h"
+#include "r200_swtcl.h"
+#include "r200_vtxfmt.h"
+
+/* =============================================================
+ * State initialization
+ */
+
+void r200PrintDirty( r200ContextPtr rmesa, const char *msg )
+{
+   struct r200_state_atom *l;
+
+   fprintf(stderr, msg);
+   fprintf(stderr, ": ");
+
+   foreach(l, &(rmesa->hw.dirty)) {
+      fprintf(stderr, "%s, ", l->name);
+   }
+
+   fprintf(stderr, "\n");
+}
+
+static int cmdpkt( int id ) 
+{
+   drmRadeonCmdHeader h;
+   h.i = 0;
+   h.packet.cmd_type = RADEON_CMD_PACKET;
+   h.packet.packet_id = id;
+   return h.i;
+}
+
+static int cmdvec( int offset, int stride, int count ) 
+{
+   drmRadeonCmdHeader h;
+   h.i = 0;
+   h.vectors.cmd_type = RADEON_CMD_VECTORS;
+   h.vectors.offset = offset;
+   h.vectors.stride = stride;
+   h.vectors.count = count;
+   return h.i;
+}
+
+static int cmdscl( int offset, int stride, int count ) 
+{
+   drmRadeonCmdHeader h;
+   h.i = 0;
+   h.scalars.cmd_type = RADEON_CMD_SCALARS;
+   h.scalars.offset = offset;
+   h.scalars.stride = stride;
+   h.scalars.count = count;
+   return h.i;
+}
+
+static int cmdscl2( int offset, int stride, int count ) 
+{
+   drmRadeonCmdHeader h;
+   h.i = 0;
+   h.scalars.cmd_type = RADEON_CMD_SCALARS2;
+   h.scalars.offset = offset - 0x100;
+   h.scalars.stride = stride;
+   h.scalars.count = count;
+   return h.i;
+}
+
+#define CHECK( NM, FLAG )                              \
+static GLboolean check_##NM( GLcontext *ctx, int idx ) \
+{                                                      \
+   (void) idx;                                         \
+   return FLAG;                                                \
+}
+
+#define TCL_CHECK( NM, FLAG )                          \
+static GLboolean check_##NM( GLcontext *ctx, int idx ) \
+{                                                      \
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);           \
+   (void) idx;                                         \
+   return !rmesa->TclFallback && (FLAG);               \
+}
+
+
+
+CHECK( always, GL_TRUE )
+CHECK( tex_any, ctx->Texture._EnabledUnits )
+CHECK( tex, ctx->Texture.Unit[idx]._ReallyEnabled )
+CHECK( fog, ctx->Fog.Enabled )
+TCL_CHECK( tcl, GL_TRUE )
+TCL_CHECK( tcl_tex_any, ctx->Texture._EnabledUnits )
+TCL_CHECK( tcl_tex, ctx->Texture.Unit[idx]._ReallyEnabled )
+TCL_CHECK( tcl_lighting, ctx->Light.Enabled )
+TCL_CHECK( tcl_eyespace_or_lighting, ctx->_NeedEyeCoords || ctx->Light.Enabled )
+TCL_CHECK( tcl_light, ctx->Light.Enabled && ctx->Light.Light[idx].Enabled )
+TCL_CHECK( tcl_ucp, (ctx->Transform.ClipPlanesEnabled & (1 << idx)) )
+/* TCL_CHECK( tcl_eyespace_or_fog, ctx->_NeedEyeCoords || ctx->Fog.Enabled )  */
+
+
+static GLboolean check_tcl_eyespace_or_fog( GLcontext *ctx, int idx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   int res;
+   (void) idx;
+   res = !rmesa->TclFallback && (ctx->_NeedEyeCoords || ctx->Fog.Enabled);
+   fprintf(stderr, "%s: %d\n", __FUNCTION__, res);
+   return res;
+}
+
+
+/* Initialize the context's hardware state.
+ */
+void r200InitState( r200ContextPtr rmesa )
+{
+   GLcontext *ctx = rmesa->glCtx;
+   GLuint color_fmt, depth_fmt, i;
+
+   switch ( rmesa->r200Screen->cpp ) {
+   case 2:
+      color_fmt = R200_COLOR_FORMAT_RGB565;
+      break;
+   case 4:
+      color_fmt = R200_COLOR_FORMAT_ARGB8888;
+      break;
+   default:
+      fprintf( stderr, "Error: Unsupported pixel depth... exiting\n" );
+      exit( -1 );
+   }
+
+   rmesa->state.color.clear = 0x00000000;
+
+   switch ( ctx->Visual.depthBits ) {
+   case 16:
+      rmesa->state.depth.scale = 1.0 / (GLfloat)0xffff;
+      depth_fmt = R200_DEPTH_FORMAT_16BIT_INT_Z;
+      rmesa->state.stencil.clear = 0x00000000;
+      break;
+   case 24:
+      rmesa->state.depth.scale = 1.0 / (GLfloat)0xffffff;
+      depth_fmt = R200_DEPTH_FORMAT_24BIT_INT_Z;
+      rmesa->state.stencil.clear = 0xff000000;
+      break;
+   default:
+      fprintf( stderr, "Error: Unsupported depth %d... exiting\n",
+              ctx->Visual.depthBits );
+      exit( -1 );
+   }
+
+   /* Only have hw stencil when depth buffer is 24 bits deep */
+   rmesa->state.stencil.hwBuffer = ( ctx->Visual.stencilBits > 0 &&
+                                    ctx->Visual.depthBits == 24 );
+
+   rmesa->Fallback = 0;
+
+   if ( ctx->Visual.doubleBufferMode && rmesa->sarea->pfCurrentPage == 0 ) {
+      rmesa->state.color.drawOffset = rmesa->r200Screen->backOffset;
+      rmesa->state.color.drawPitch  = rmesa->r200Screen->backPitch;
+   } else {
+      rmesa->state.color.drawOffset = rmesa->r200Screen->frontOffset;
+      rmesa->state.color.drawPitch  = rmesa->r200Screen->frontPitch;
+   }
+
+   rmesa->state.pixel.readOffset = rmesa->state.color.drawOffset;
+   rmesa->state.pixel.readPitch  = rmesa->state.color.drawPitch;
+
+   /* Initialize lists:
+    */
+   make_empty_list(&(rmesa->hw.dirty)); rmesa->hw.dirty.name = "DIRTY";
+   make_empty_list(&(rmesa->hw.clean)); rmesa->hw.clean.name = "CLEAN";
+
+
+#define ALLOC_STATE( ATOM, CHK, SZ, NM, IDX )                          \
+   do {                                                                \
+      rmesa->hw.ATOM.cmd_size = SZ;                            \
+      rmesa->hw.ATOM.cmd = (int *)CALLOC(SZ * sizeof(int));    \
+      rmesa->hw.ATOM.lastcmd = (int *)CALLOC(SZ * sizeof(int));        \
+      rmesa->hw.ATOM.name = NM;                                        \
+      rmesa->hw.ATOM.idx = IDX;                                        \
+      rmesa->hw.ATOM.check = check_##CHK;                              \
+      insert_at_head(&(rmesa->hw.dirty), &(rmesa->hw.ATOM));   \
+   } while (0)
+      
+      
+   /* Allocate state buffers:
+    */
+   ALLOC_STATE( ctx, always, CTX_STATE_SIZE, "CTX/context", 0 );
+   ALLOC_STATE( set, always, SET_STATE_SIZE, "SET/setup", 0 );
+   ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 );
+   ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 );
+   ALLOC_STATE( vpt, always, VPT_STATE_SIZE, "VPT/viewport", 0 );
+   ALLOC_STATE( vtx, always, VTX_STATE_SIZE, "VTX/vertex", 0 );
+   ALLOC_STATE( vap, always, VAP_STATE_SIZE, "VAP/vap", 0 );
+   ALLOC_STATE( vte, always, VTE_STATE_SIZE, "VTE/vte", 0 );
+   ALLOC_STATE( msc, always, MSC_STATE_SIZE, "MSC/misc", 0 );
+   ALLOC_STATE( cst, always, CST_STATE_SIZE, "CST/constant", 0 );
+   ALLOC_STATE( zbs, always, ZBS_STATE_SIZE, "ZBS/zbias", 0 );
+   ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 );
+   ALLOC_STATE( tf, tex_any, TF_STATE_SIZE, "TF/tfactor", 0 );
+   ALLOC_STATE( tex[0], tex_any, TEX_STATE_SIZE, "TEX/tex-0", 0 );
+   ALLOC_STATE( tex[1], tex_any, TEX_STATE_SIZE, "TEX/tex-1", 1 );
+   ALLOC_STATE( cube[0], tex_any, CUBE_STATE_SIZE, "CUBE/tex-0", 0 );
+   ALLOC_STATE( cube[1], tex_any, CUBE_STATE_SIZE, "CUBE/tex-1", 1 );
+
+   ALLOC_STATE( tcl, tcl, TCL_STATE_SIZE, "TCL/tcl", 0 );
+   ALLOC_STATE( msl, tcl, MSL_STATE_SIZE, "MSL/matrix-select", 0 );
+   ALLOC_STATE( tcg, tcl, TCG_STATE_SIZE, "TCG/texcoordgen", 0 );
+   ALLOC_STATE( mtl[0], tcl_lighting, MTL_STATE_SIZE, "MTL0/material0", 0 );
+   ALLOC_STATE( grd, tcl, GRD_STATE_SIZE, "GRD/guard-band", 0 );
+   ALLOC_STATE( fog, fog, FOG_STATE_SIZE, "FOG/fog", 0 );
+   ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 0 );
+   ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 0 );
+   ALLOC_STATE( mat[R200_MTX_MV], tcl, MAT_STATE_SIZE, "MAT/modelview", 0 );
+   ALLOC_STATE( mat[R200_MTX_IMV], tcl, MAT_STATE_SIZE, "MAT/it-modelview", 0 );
+   ALLOC_STATE( mat[R200_MTX_MVP], tcl, MAT_STATE_SIZE, "MAT/modelproject", 0 );
+   ALLOC_STATE( mat[R200_MTX_TEX0], tcl_tex, MAT_STATE_SIZE, "MAT/texmat0", 0 );
+   ALLOC_STATE( mat[R200_MTX_TEX1], tcl_tex, MAT_STATE_SIZE, "MAT/texmat1", 1 );
+   ALLOC_STATE( ucp[0], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-0", 0 );
+   ALLOC_STATE( ucp[1], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-1", 1 );
+   ALLOC_STATE( ucp[2], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-2", 2 );
+   ALLOC_STATE( ucp[3], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-3", 3 );
+   ALLOC_STATE( ucp[4], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-4", 4 );
+   ALLOC_STATE( ucp[5], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-5", 5 );
+   ALLOC_STATE( lit[0], tcl_light, LIT_STATE_SIZE, "LIT/light-0", 0 );
+   ALLOC_STATE( lit[1], tcl_light, LIT_STATE_SIZE, "LIT/light-1", 1 );
+   ALLOC_STATE( lit[2], tcl_light, LIT_STATE_SIZE, "LIT/light-2", 2 );
+   ALLOC_STATE( lit[3], tcl_light, LIT_STATE_SIZE, "LIT/light-3", 3 );
+   ALLOC_STATE( lit[4], tcl_light, LIT_STATE_SIZE, "LIT/light-4", 4 );
+   ALLOC_STATE( lit[5], tcl_light, LIT_STATE_SIZE, "LIT/light-5", 5 );
+   ALLOC_STATE( lit[6], tcl_light, LIT_STATE_SIZE, "LIT/light-6", 6 );
+   ALLOC_STATE( lit[7], tcl_light, LIT_STATE_SIZE, "LIT/light-7", 7 );
+   ALLOC_STATE( pix[0], always, PIX_STATE_SIZE, "PIX/pixstage-0", 0 );
+   ALLOC_STATE( pix[1], tex, PIX_STATE_SIZE, "PIX/pixstage-1", 1 );
+
+
+   /* Fill in the packet headers:
+    */
+   rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(RADEON_EMIT_PP_MISC);
+   rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(RADEON_EMIT_PP_CNTL);
+   rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(RADEON_EMIT_RB3D_COLORPITCH);
+   rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(RADEON_EMIT_RE_LINE_PATTERN);
+   rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(RADEON_EMIT_SE_LINE_WIDTH);
+   rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(RADEON_EMIT_RB3D_STENCILREFMASK);
+   rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(RADEON_EMIT_SE_VPORT_XSCALE);
+   rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(RADEON_EMIT_SE_CNTL);
+   rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(RADEON_EMIT_RE_MISC);
+   rmesa->hw.cst.cmd[CST_CMD_0] = cmdpkt(R200_EMIT_PP_CNTL_X);
+   rmesa->hw.cst.cmd[CST_CMD_1] = cmdpkt(R200_EMIT_RB3D_DEPTHXY_OFFSET);
+   rmesa->hw.cst.cmd[CST_CMD_2] = cmdpkt(R200_EMIT_RE_AUX_SCISSOR_CNTL);
+   rmesa->hw.cst.cmd[CST_CMD_3] = cmdpkt(R200_EMIT_RE_SCISSOR_TL_0);
+   rmesa->hw.cst.cmd[CST_CMD_4] = cmdpkt(R200_EMIT_SE_VAP_CNTL_STATUS);
+   rmesa->hw.cst.cmd[CST_CMD_5] = cmdpkt(R200_EMIT_RE_POINTSIZE);
+   rmesa->hw.cst.cmd[CST_CMD_6] = cmdpkt(R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0);
+   rmesa->hw.tam.cmd[TAM_CMD_0] = cmdpkt(R200_EMIT_PP_TAM_DEBUG3);
+   rmesa->hw.tf.cmd[TF_CMD_0]   = cmdpkt(R200_EMIT_TFACTOR_0);
+   rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_0);
+   rmesa->hw.tex[0].cmd[TEX_CMD_1] = cmdpkt(R200_EMIT_PP_TXOFFSET_0);
+   rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_1);
+   rmesa->hw.tex[1].cmd[TEX_CMD_1] = cmdpkt(R200_EMIT_PP_TXOFFSET_1);
+   rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_0);
+   rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_0);
+   rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_1);
+   rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_1);
+   rmesa->hw.pix[0].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_0);
+   rmesa->hw.pix[1].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_1);
+   rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(RADEON_EMIT_SE_ZBIAS_FACTOR);
+   rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(R200_EMIT_TCL_LIGHT_MODEL_CTL_0);
+   rmesa->hw.tcl.cmd[TCL_CMD_1] = cmdpkt(R200_EMIT_TCL_UCP_VERT_BLEND_CTL);
+   rmesa->hw.tcg.cmd[TCG_CMD_0] = cmdpkt(R200_EMIT_TEX_PROC_CTL_2);
+   rmesa->hw.msl.cmd[MSL_CMD_0] = cmdpkt(R200_EMIT_MATRIX_SELECT_0);
+   rmesa->hw.vap.cmd[VAP_CMD_0] = cmdpkt(R200_EMIT_VAP_CTL);
+   rmesa->hw.vtx.cmd[VTX_CMD_0] = cmdpkt(R200_EMIT_VTX_FMT_0);
+   rmesa->hw.vtx.cmd[VTX_CMD_1] = cmdpkt(R200_EMIT_OUTPUT_VTX_COMP_SEL);
+   rmesa->hw.vtx.cmd[VTX_CMD_2] = cmdpkt(R200_EMIT_SE_VTX_STATE_CNTL);
+   rmesa->hw.vte.cmd[VTE_CMD_0] = cmdpkt(R200_EMIT_VTE_CNTL);
+   rmesa->hw.mtl[0].cmd[MTL_CMD_0] = 
+      cmdvec( R200_VS_MAT_0_EMISS, 1, 16 );
+   rmesa->hw.mtl[0].cmd[MTL_CMD_1] = 
+      cmdscl2( R200_SS_MAT_0_SHININESS, 1, 1 );
+   rmesa->hw.grd.cmd[GRD_CMD_0] = 
+      cmdscl( R200_SS_VERT_GUARD_CLIP_ADJ_ADDR, 1, 4 );
+   rmesa->hw.fog.cmd[FOG_CMD_0] = 
+      cmdvec( R200_VS_FOG_PARAM_ADDR, 1, 4 );
+   rmesa->hw.glt.cmd[GLT_CMD_0] = 
+      cmdvec( R200_VS_GLOBAL_AMBIENT_ADDR, 1, 4 );
+   rmesa->hw.eye.cmd[EYE_CMD_0] = 
+      cmdvec( R200_VS_EYE_VECTOR_ADDR, 1, 4 );
+
+   rmesa->hw.mat[R200_MTX_MV].cmd[MAT_CMD_0] = 
+      cmdvec( R200_VS_MATRIX_0_MV, 1, 16);
+   rmesa->hw.mat[R200_MTX_IMV].cmd[MAT_CMD_0] = 
+      cmdvec( R200_VS_MATRIX_1_INV_MV, 1, 16);
+   rmesa->hw.mat[R200_MTX_MVP].cmd[MAT_CMD_0] = 
+      cmdvec( R200_VS_MATRIX_2_MVP, 1, 16);
+   rmesa->hw.mat[R200_MTX_TEX0].cmd[MAT_CMD_0] = 
+      cmdvec( R200_VS_MATRIX_3_TEX0, 1, 16);
+   rmesa->hw.mat[R200_MTX_TEX1].cmd[MAT_CMD_0] = 
+      cmdvec( R200_VS_MATRIX_4_TEX1, 1, 16);
+
+   for (i = 0 ; i < 8; i++) {
+      rmesa->hw.lit[i].cmd[LIT_CMD_0] = 
+        cmdvec( R200_VS_LIGHT_AMBIENT_ADDR + i, 8, 24 );
+      rmesa->hw.lit[i].cmd[LIT_CMD_1] = 
+        cmdscl( R200_SS_LIGHT_DCD_ADDR + i, 8, 7 );
+   }
+
+   for (i = 0 ; i < 6; i++) {
+      rmesa->hw.ucp[i].cmd[UCP_CMD_0] = 
+        cmdvec( R200_VS_UCP_ADDR + i, 1, 4 );
+   }
+
+   /* Initial Harware state:
+    */
+   rmesa->hw.ctx.cmd[CTX_PP_MISC] = (R200_ALPHA_TEST_PASS
+                                    /* | R200_RIGHT_HAND_CUBE_OGL*/);
+
+   rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] = (R200_FOG_VERTEX |
+                                         R200_FOG_USE_SPEC_ALPHA);
+
+   rmesa->hw.ctx.cmd[CTX_RE_SOLID_COLOR] = 0x00000000;
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = (R200_COMB_FCN_ADD_CLAMP |
+                                           R200_SRC_BLEND_GL_ONE |
+                                           R200_DST_BLEND_GL_ZERO );
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHOFFSET] =
+      rmesa->r200Screen->depthOffset;
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] = 
+      ((rmesa->r200Screen->depthPitch &
+       R200_DEPTHPITCH_MASK) |
+       R200_DEPTH_ENDIAN_NO_SWAP);
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (depth_fmt |
+                                              R200_Z_TEST_LESS |  
+                                              R200_STENCIL_TEST_ALWAYS |
+                                              R200_STENCIL_FAIL_KEEP |
+                                              R200_STENCIL_ZPASS_KEEP |
+                                              R200_STENCIL_ZFAIL_KEEP |
+                                              R200_Z_WRITE_ENABLE);
+
+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] = (R200_ANTI_ALIAS_NONE 
+                                    | R200_TEX_BLEND_0_ENABLE);
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = color_fmt;
+   rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_DITHER_ENABLE;
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = (rmesa->state.color.drawOffset &
+                                             R200_COLOROFFSET_MASK);
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = ((rmesa->state.color.drawPitch &
+                                             R200_COLORPITCH_MASK) |
+                                            R200_COLOR_ENDIAN_NO_SWAP);
+
+   rmesa->hw.set.cmd[SET_SE_CNTL] = (R200_FFACE_CULL_CCW |
+                                    R200_BFACE_SOLID |
+                                    R200_FFACE_SOLID |
+                                    R200_FLAT_SHADE_VTX_LAST |
+                                    R200_DIFFUSE_SHADE_GOURAUD |
+                                    R200_ALPHA_SHADE_GOURAUD |
+                                    R200_SPECULAR_SHADE_GOURAUD |
+                                    R200_FOG_SHADE_GOURAUD |
+                                    R200_VTX_PIX_CENTER_OGL |
+                                    R200_ROUND_MODE_TRUNC |
+                                    R200_ROUND_PREC_8TH_PIX);
+
+   rmesa->hw.set.cmd[SET_RE_CNTL] = (R200_PERSPECTIVE_ENABLE |
+                                    R200_SCISSOR_ENABLE);
+
+   rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] = ((1 << 16) | 0xffff);
+
+   rmesa->hw.lin.cmd[LIN_RE_LINE_STATE] = 
+      ((0 << R200_LINE_CURRENT_PTR_SHIFT) |
+       (1 << R200_LINE_CURRENT_COUNT_SHIFT));
+
+   rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] = (1 << 4);
+
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] = 
+      ((0x00 << R200_STENCIL_REF_SHIFT) |
+       (0xff << R200_STENCIL_MASK_SHIFT) |
+       (0xff << R200_STENCIL_WRITEMASK_SHIFT));
+
+   rmesa->hw.msk.cmd[MSK_RB3D_ROPCNTL] = R200_ROP_COPY;
+   rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] = 0xffffffff;
+
+   rmesa->hw.tam.cmd[TAM_DEBUG3] = 0;
+
+   rmesa->hw.msc.cmd[MSC_RE_MISC] = 
+      ((0 << R200_STIPPLE_X_OFFSET_SHIFT) |
+       (0 << R200_STIPPLE_Y_OFFSET_SHIFT) |
+       R200_STIPPLE_BIG_BIT_ORDER);
+
+
+   rmesa->hw.cst.cmd[CST_PP_CNTL_X] = 0;
+   rmesa->hw.cst.cmd[CST_RB3D_DEPTHXY_OFFSET] = 0;
+   rmesa->hw.cst.cmd[CST_RE_AUX_SCISSOR_CNTL] = 0x0;
+   rmesa->hw.cst.cmd[CST_RE_SCISSOR_TL_0] = 0;
+   rmesa->hw.cst.cmd[CST_RE_SCISSOR_BR_0] = 0;
+   rmesa->hw.cst.cmd[CST_SE_VAP_CNTL_STATUS] =
+#ifdef MESA_BIG_ENDIAN
+                                               R200_VC_32BIT_SWAP;
+#else
+                                               R200_VC_NO_SWAP;
+#endif
+   rmesa->hw.cst.cmd[CST_RE_POINTSIZE] = 0x100010;
+   rmesa->hw.cst.cmd[CST_SE_TCL_INPUT_VTX_0] =
+      (0x0 << R200_VERTEX_POSITION_ADDR__SHIFT);
+   rmesa->hw.cst.cmd[CST_SE_TCL_INPUT_VTX_1] =
+      (0x02 << R200_VTX_COLOR_0_ADDR__SHIFT) |
+      (0x03 << R200_VTX_COLOR_1_ADDR__SHIFT);
+   rmesa->hw.cst.cmd[CST_SE_TCL_INPUT_VTX_2] =
+      (0x06 << R200_VTX_TEX_0_ADDR__SHIFT) |
+      (0x07 << R200_VTX_TEX_1_ADDR__SHIFT) |
+      (0x08 << R200_VTX_TEX_2_ADDR__SHIFT) |
+      (0x09 << R200_VTX_TEX_3_ADDR__SHIFT);
+   rmesa->hw.cst.cmd[CST_SE_TCL_INPUT_VTX_3] =
+      (0x0A << R200_VTX_TEX_4_ADDR__SHIFT) |
+      (0x0B << R200_VTX_TEX_5_ADDR__SHIFT);
+  
+
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE]  = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YSCALE]  = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZSCALE]  = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZOFFSET] = 0x00000000;
+
+   for ( i = 0 ; i < ctx->Const.MaxTextureUnits ; i++ ) {
+      rmesa->hw.tex[i].cmd[TEX_PP_TXFILTER] = R200_BORDER_MODE_OGL;
+      rmesa->hw.tex[i].cmd[TEX_PP_TXFORMAT] = 
+         ((i << R200_TXFORMAT_ST_ROUTE_SHIFT) |  /* <-- note i */
+          (2 << R200_TXFORMAT_WIDTH_SHIFT) |
+          (2 << R200_TXFORMAT_HEIGHT_SHIFT));
+      rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET] = 0;
+      rmesa->hw.tex[i].cmd[TEX_PP_BORDER_COLOR] = 0;
+      rmesa->hw.tex[i].cmd[TEX_PP_TXFORMAT_X] =
+         (/* R200_TEXCOORD_PROJ | */
+          0x100000);   /* Small default bias */
+
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_FACES] = 0;
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F1] = 0;
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F2] = 0;
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F3] = 0;
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F4] = 0;
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F5] = 0;
+   }
+
+   rmesa->hw.pix[0].cmd[PIX_PP_TXCBLEND] =  
+      (R200_TXC_ARG_A_ZERO |
+       R200_TXC_ARG_B_ZERO |
+       R200_TXC_ARG_C_DIFFUSE_COLOR |
+       R200_TXC_OP_MADD);
+
+   rmesa->hw.pix[0].cmd[PIX_PP_TXCBLEND2] =  
+      ((0 << R200_TXC_TFACTOR_SEL_SHIFT) | 
+       R200_TXC_SCALE_1X |
+       R200_TXC_CLAMP_0_1 |
+       R200_TXC_OUTPUT_REG_R0);
+
+   rmesa->hw.pix[0].cmd[PIX_PP_TXABLEND] =  
+      (R200_TXA_ARG_A_ZERO |
+       R200_TXA_ARG_B_ZERO |
+       R200_TXA_ARG_C_DIFFUSE_ALPHA |
+       R200_TXA_OP_MADD);
+
+   rmesa->hw.pix[0].cmd[PIX_PP_TXABLEND2] =  
+      ((0 << R200_TXA_TFACTOR_SEL_SHIFT) | 
+       R200_TXA_SCALE_1X |
+       R200_TXA_CLAMP_0_1 |
+       R200_TXA_OUTPUT_REG_R0);
+
+   rmesa->hw.pix[1].cmd[PIX_PP_TXCBLEND] =  
+      (R200_TXC_ARG_A_ZERO |
+       R200_TXC_ARG_B_ZERO |
+       R200_TXC_ARG_C_DIFFUSE_COLOR |
+       R200_TXC_OP_MADD);
+
+   rmesa->hw.pix[1].cmd[PIX_PP_TXCBLEND2] =  
+      ((0 << R200_TXC_TFACTOR_SEL_SHIFT) | 
+       R200_TXC_SCALE_1X |
+       R200_TXC_CLAMP_0_1 |
+       R200_TXC_OUTPUT_REG_R0);
+
+   rmesa->hw.pix[1].cmd[PIX_PP_TXABLEND] =  
+      (R200_TXA_ARG_A_ZERO |
+       R200_TXA_ARG_B_ZERO |
+       R200_TXA_ARG_C_DIFFUSE_ALPHA |
+       R200_TXA_OP_MADD);
+
+   rmesa->hw.pix[1].cmd[PIX_PP_TXABLEND2] =  
+      ((0 << R200_TXA_TFACTOR_SEL_SHIFT) | 
+       R200_TXA_SCALE_1X |
+       R200_TXA_CLAMP_0_1 |
+       R200_TXA_OUTPUT_REG_R0);
+
+   rmesa->hw.tf.cmd[TF_TFACTOR_0] = 0;
+   rmesa->hw.tf.cmd[TF_TFACTOR_1] = 0;
+   rmesa->hw.tf.cmd[TF_TFACTOR_2] = 0;
+   rmesa->hw.tf.cmd[TF_TFACTOR_3] = 0;
+   rmesa->hw.tf.cmd[TF_TFACTOR_4] = 0;
+   rmesa->hw.tf.cmd[TF_TFACTOR_5] = 0;
+
+   rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] = 
+      (R200_VAP_TCL_ENABLE | 
+       (0x9 << R200_VAP_VF_MAX_VTX_NUM__SHIFT));
+
+   rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] = 
+      (R200_VPORT_X_SCALE_ENA |
+       R200_VPORT_Y_SCALE_ENA |
+       R200_VPORT_Z_SCALE_ENA |
+       R200_VPORT_X_OFFSET_ENA |
+       R200_VPORT_Y_OFFSET_ENA |
+       R200_VPORT_Z_OFFSET_ENA |
+/* FIXME: Turn on for tex rect only */
+       R200_VTX_ST_DENORMALIZED |  
+       R200_VTX_W0_FMT); 
+
+
+   rmesa->hw.vtx.cmd[VTX_VTXFMT_0] = 0;
+   rmesa->hw.vtx.cmd[VTX_VTXFMT_1] = 0;
+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] = 
+      ((R200_VTX_Z0 | R200_VTX_W0 |
+       (R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT))); 
+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] = 0;
+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] = (R200_OUTPUT_XYZW);
+   rmesa->hw.vtx.cmd[VTX_STATE_CNTL] = R200_VSC_UPDATE_USER_COLOR_0_ENABLE;
+                                                  
+
+   /* Matrix selection */
+   rmesa->hw.msl.cmd[MSL_MATRIX_SELECT_0] = 
+      (R200_MTX_MV << R200_MODELVIEW_0_SHIFT);
+   
+   rmesa->hw.msl.cmd[MSL_MATRIX_SELECT_1] = 
+       (R200_MTX_IMV << R200_IT_MODELVIEW_0_SHIFT);
+
+   rmesa->hw.msl.cmd[MSL_MATRIX_SELECT_2] = 
+      (R200_MTX_MVP << R200_MODELPROJECT_0_SHIFT);
+
+   rmesa->hw.msl.cmd[MSL_MATRIX_SELECT_3] = 
+      ((R200_MTX_TEX0 << R200_TEXMAT_0_SHIFT) |
+       (R200_MTX_TEX1 << R200_TEXMAT_1_SHIFT) |
+       (R200_MTX_TEX2 << R200_TEXMAT_2_SHIFT) |
+       (R200_MTX_TEX3 << R200_TEXMAT_3_SHIFT));
+
+   rmesa->hw.msl.cmd[MSL_MATRIX_SELECT_4] = 
+      ((R200_MTX_TEX4 << R200_TEXMAT_4_SHIFT) |
+       (R200_MTX_TEX5 << R200_TEXMAT_5_SHIFT));
+
+
+   /* General TCL state */
+   rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] = 
+      (R200_SPECULAR_LIGHTS |
+       R200_DIFFUSE_SPECULAR_COMBINE |
+       R200_LOCAL_LIGHT_VEC_GL);
+
+   rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1] = 
+      ((R200_LM1_SOURCE_LIGHT_PREMULT << R200_FRONT_EMISSIVE_SOURCE_SHIFT) |
+       (R200_LM1_SOURCE_LIGHT_PREMULT << R200_FRONT_AMBIENT_SOURCE_SHIFT) |
+       (R200_LM1_SOURCE_LIGHT_PREMULT << R200_FRONT_DIFFUSE_SOURCE_SHIFT) |
+       (R200_LM1_SOURCE_LIGHT_PREMULT << R200_FRONT_SPECULAR_SOURCE_SHIFT) |
+       (R200_LM1_SOURCE_LIGHT_PREMULT << R200_BACK_EMISSIVE_SOURCE_SHIFT) |
+       (R200_LM1_SOURCE_LIGHT_PREMULT << R200_BACK_AMBIENT_SOURCE_SHIFT) |
+       (R200_LM1_SOURCE_LIGHT_PREMULT << R200_BACK_DIFFUSE_SOURCE_SHIFT) |
+       (R200_LM1_SOURCE_LIGHT_PREMULT << R200_BACK_SPECULAR_SOURCE_SHIFT)); 
+
+   rmesa->hw.tcl.cmd[TCL_PER_LIGHT_CTL_0] = 0; /* filled in via callbacks */
+   rmesa->hw.tcl.cmd[TCL_PER_LIGHT_CTL_1] = 0;
+   rmesa->hw.tcl.cmd[TCL_PER_LIGHT_CTL_2] = 0;
+   rmesa->hw.tcl.cmd[TCL_PER_LIGHT_CTL_3] = 0;
+   
+   rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] = 
+      (R200_UCP_IN_CLIP_SPACE |
+       R200_CULL_FRONT_IS_CCW);
+
+   /* Texgen/Texmat state */
+   rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_2] = 0x0; /* masks??? */
+   rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_3] = 
+      ((0 << R200_TEXGEN_0_INPUT_TEX_SHIFT) |
+       (1 << R200_TEXGEN_1_INPUT_TEX_SHIFT) |
+       (2 << R200_TEXGEN_2_INPUT_TEX_SHIFT) |
+       (3 << R200_TEXGEN_3_INPUT_TEX_SHIFT) |
+       (4 << R200_TEXGEN_4_INPUT_TEX_SHIFT) |
+       (5 << R200_TEXGEN_5_INPUT_TEX_SHIFT)); 
+   rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_0] = 0; 
+   rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_1] =  
+      ((0 << R200_TEXGEN_0_INPUT_SHIFT) |
+       (1 << R200_TEXGEN_1_INPUT_SHIFT) |
+       (2 << R200_TEXGEN_2_INPUT_SHIFT) |
+       (3 << R200_TEXGEN_3_INPUT_SHIFT) |
+       (4 << R200_TEXGEN_4_INPUT_SHIFT) |
+       (5 << R200_TEXGEN_5_INPUT_SHIFT)); 
+   rmesa->hw.tcg.cmd[TCG_TEX_CYL_WRAP_CTL] = 0;
+
+   rmesa->TexGenInputs = rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_1];
+
+
+   for (i = 0 ; i < 8; i++) {
+      struct gl_light *l = &ctx->Light.Light[i];
+      GLenum p = GL_LIGHT0 + i;
+      *(float *)&(rmesa->hw.lit[i].cmd[LIT_RANGE_CUTOFF]) = FLT_MAX;
+
+      ctx->Driver.Lightfv( ctx, p, GL_AMBIENT, l->Ambient );
+      ctx->Driver.Lightfv( ctx, p, GL_DIFFUSE, l->Diffuse );
+      ctx->Driver.Lightfv( ctx, p, GL_SPECULAR, l->Specular );
+      ctx->Driver.Lightfv( ctx, p, GL_POSITION, 0 );
+      ctx->Driver.Lightfv( ctx, p, GL_SPOT_DIRECTION, 0 );
+      ctx->Driver.Lightfv( ctx, p, GL_SPOT_EXPONENT, &l->SpotExponent );
+      ctx->Driver.Lightfv( ctx, p, GL_SPOT_CUTOFF, &l->SpotCutoff );
+      ctx->Driver.Lightfv( ctx, p, GL_CONSTANT_ATTENUATION,
+                          &l->ConstantAttenuation );
+      ctx->Driver.Lightfv( ctx, p, GL_LINEAR_ATTENUATION, 
+                          &l->LinearAttenuation );
+      ctx->Driver.Lightfv( ctx, p, GL_QUADRATIC_ATTENUATION, 
+                    &l->QuadraticAttenuation );
+   }
+
+   ctx->Driver.LightModelfv( ctx, GL_LIGHT_MODEL_AMBIENT, 
+                            ctx->Light.Model.Ambient );
+
+   TNL_CONTEXT(ctx)->Driver.NotifyMaterialChange( ctx );
+
+   for (i = 0 ; i < 6; i++) {
+      ctx->Driver.ClipPlane( ctx, GL_CLIP_PLANE0 + i, NULL );
+   }
+
+   ctx->Driver.Fogfv( ctx, GL_FOG_MODE, 0 );
+   ctx->Driver.Fogfv( ctx, GL_FOG_DENSITY, &ctx->Fog.Density );
+   ctx->Driver.Fogfv( ctx, GL_FOG_START, &ctx->Fog.Start );
+   ctx->Driver.Fogfv( ctx, GL_FOG_END, &ctx->Fog.End );
+   ctx->Driver.Fogfv( ctx, GL_FOG_COLOR, ctx->Fog.Color );
+   ctx->Driver.Fogfv( ctx, GL_FOG_COORDINATE_SOURCE_EXT, 0 );
+   
+   rmesa->hw.grd.cmd[GRD_VERT_GUARD_CLIP_ADJ] = IEEE_ONE;
+   rmesa->hw.grd.cmd[GRD_VERT_GUARD_DISCARD_ADJ] = IEEE_ONE;
+   rmesa->hw.grd.cmd[GRD_HORZ_GUARD_CLIP_ADJ] = IEEE_ONE;
+   rmesa->hw.grd.cmd[GRD_HORZ_GUARD_DISCARD_ADJ] = IEEE_ONE;
+
+   rmesa->hw.eye.cmd[EYE_X] = 0;
+   rmesa->hw.eye.cmd[EYE_Y] = 0;
+   rmesa->hw.eye.cmd[EYE_Z] = IEEE_ONE;
+   rmesa->hw.eye.cmd[EYE_RESCALE_FACTOR] = IEEE_ONE;
+
+   r200LightingSpaceChange( ctx );
+   
+   rmesa->lost_context = 1;
+}
diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.c b/src/mesa/drivers/dri/r200/r200_swtcl.c
new file mode 100644 (file)
index 0000000..6a43650
--- /dev/null
@@ -0,0 +1,1291 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "glheader.h"
+#include "mtypes.h"
+#include "colormac.h"
+#include "enums.h"
+#include "image.h"
+#include "imports.h"
+#include "macros.h"
+
+#include "swrast/s_context.h"
+#include "swrast/s_fog.h"
+#include "swrast_setup/swrast_setup.h"
+#include "math/m_translate.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_imm_exec.h"
+#include "tnl/t_pipeline.h"
+
+#include "r200_context.h"
+#include "r200_ioctl.h"
+#include "r200_state.h"
+#include "r200_swtcl.h"
+#include "r200_tcl.h"
+
+/***********************************************************************
+ *              Build render functions from dd templates               *
+ ***********************************************************************/
+
+
+#define R200_XYZW_BIT          0x01
+#define R200_RGBA_BIT          0x02
+#define R200_SPEC_BIT          0x04
+#define R200_TEX0_BIT          0x08
+#define R200_TEX1_BIT          0x10
+#define R200_PTEX_BIT          0x20
+#define R200_MAX_SETUP 0x40
+
+static void flush_last_swtcl_prim( r200ContextPtr rmesa  );
+
+static struct {
+   void                (*emit)( GLcontext *, GLuint, GLuint, void *, GLuint );
+   interp_func         interp;
+   copy_pv_func                copy_pv;
+   GLboolean           (*check_tex_sizes)( GLcontext *ctx );
+   GLuint               vertex_size;
+   GLuint               vertex_stride_shift;
+   GLuint               vertex_format;
+} setup_tab[R200_MAX_SETUP];
+
+
+static int se_vtx_fmt_0[] = {
+   0,
+
+   (R200_VTX_XY |
+    R200_VTX_Z0 |
+    (R200_VTX_PK_RGBA << R200_VTX_COLOR_0_SHIFT)),
+
+   (R200_VTX_XY |
+    R200_VTX_Z0 |
+    R200_VTX_W0 |
+    (R200_VTX_PK_RGBA << R200_VTX_COLOR_0_SHIFT) |
+    (R200_VTX_PK_RGBA << R200_VTX_COLOR_1_SHIFT)),
+
+   (R200_VTX_XY |
+    R200_VTX_Z0 |
+    R200_VTX_W0 |
+    (R200_VTX_PK_RGBA << R200_VTX_COLOR_0_SHIFT) |
+    (R200_VTX_PK_RGBA << R200_VTX_COLOR_1_SHIFT)),
+
+   (R200_VTX_XY |
+    R200_VTX_Z0 |
+    R200_VTX_W0 |
+    (R200_VTX_PK_RGBA << R200_VTX_COLOR_0_SHIFT) |
+    (R200_VTX_PK_RGBA << R200_VTX_COLOR_1_SHIFT)),
+
+   (R200_VTX_XY |
+    R200_VTX_Z0 |
+    R200_VTX_W0 |
+    (R200_VTX_PK_RGBA << R200_VTX_COLOR_0_SHIFT) |
+    (R200_VTX_PK_RGBA << R200_VTX_COLOR_1_SHIFT))
+};
+
+static int se_vtx_fmt_1[] = {
+   0,
+   0,
+   0,
+   ((2 << R200_VTX_TEX0_COMP_CNT_SHIFT)),
+   ((2 << R200_VTX_TEX0_COMP_CNT_SHIFT) |
+    (2 << R200_VTX_TEX1_COMP_CNT_SHIFT)),
+   ((3 << R200_VTX_TEX0_COMP_CNT_SHIFT) |
+    (3 << R200_VTX_TEX1_COMP_CNT_SHIFT)),
+};
+
+#define TINY_VERTEX_FORMAT     1
+#define NOTEX_VERTEX_FORMAT    2
+#define TEX0_VERTEX_FORMAT     3
+#define TEX1_VERTEX_FORMAT     4
+#define PROJ_TEX1_VERTEX_FORMAT        5
+#define TEX2_VERTEX_FORMAT 0
+#define TEX3_VERTEX_FORMAT 0
+#define PROJ_TEX3_VERTEX_FORMAT 0
+
+#define DO_XYZW (IND & R200_XYZW_BIT)
+#define DO_RGBA (IND & R200_RGBA_BIT)
+#define DO_SPEC (IND & R200_SPEC_BIT)
+#define DO_FOG  (IND & R200_SPEC_BIT)
+#define DO_TEX0 (IND & R200_TEX0_BIT)
+#define DO_TEX1 (IND & R200_TEX1_BIT)
+#define DO_TEX2 0
+#define DO_TEX3 0
+#define DO_PTEX (IND & R200_PTEX_BIT)
+
+#define VERTEX r200Vertex
+#define VERTEX_COLOR r200_color_t
+#define GET_VIEWPORT_MAT() 0
+#define GET_TEXSOURCE(n)  n
+#define GET_VERTEX_FORMAT() R200_CONTEXT(ctx)->swtcl.vertex_format
+#define GET_VERTEX_STORE() R200_CONTEXT(ctx)->swtcl.verts
+#define GET_VERTEX_STRIDE_SHIFT() R200_CONTEXT(ctx)->swtcl.vertex_stride_shift
+#define GET_UBYTE_COLOR_STORE() &R200_CONTEXT(ctx)->UbyteColor
+#define GET_UBYTE_SPEC_COLOR_STORE() &R200_CONTEXT(ctx)->UbyteSecondaryColor
+
+#define HAVE_HW_VIEWPORT    1
+#define HAVE_HW_DIVIDE      (IND & ~(R200_XYZW_BIT|R200_RGBA_BIT))
+#define HAVE_TINY_VERTICES  1
+#define HAVE_RGBA_COLOR     1
+#define HAVE_NOTEX_VERTICES 1
+#define HAVE_TEX0_VERTICES  1
+#define HAVE_TEX1_VERTICES  1
+#define HAVE_TEX2_VERTICES  0
+#define HAVE_TEX3_VERTICES  0
+#define HAVE_PTEX_VERTICES  1
+
+#define CHECK_HW_DIVIDE    (!(ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE| \
+                                                    DD_TRI_UNFILLED)))
+
+#define IMPORT_QUALIFIER
+#define IMPORT_FLOAT_COLORS r200_import_float_colors
+#define IMPORT_FLOAT_SPEC_COLORS r200_import_float_spec_colors
+
+#define INTERP_VERTEX setup_tab[R200_CONTEXT(ctx)->swtcl.SetupIndex].interp
+#define COPY_PV_VERTEX setup_tab[R200_CONTEXT(ctx)->swtcl.SetupIndex].copy_pv
+
+
+/***********************************************************************
+ *         Generate  pv-copying and translation functions              *
+ ***********************************************************************/
+
+#define TAG(x) r200_##x
+#define IND ~0
+#include "tnl_dd/t_dd_vb.c"
+#undef IND
+
+
+/***********************************************************************
+ *             Generate vertex emit and interp functions               *
+ ***********************************************************************/
+
+#define IND (R200_XYZW_BIT|R200_RGBA_BIT)
+#define TAG(x) x##_wg
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (R200_XYZW_BIT|R200_RGBA_BIT|R200_TEX0_BIT)
+#define TAG(x) x##_wgt0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (R200_XYZW_BIT|R200_RGBA_BIT|R200_TEX0_BIT|R200_PTEX_BIT)
+#define TAG(x) x##_wgpt0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (R200_XYZW_BIT|R200_RGBA_BIT|R200_TEX0_BIT|R200_TEX1_BIT)
+#define TAG(x) x##_wgt0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (R200_XYZW_BIT|R200_RGBA_BIT|R200_TEX0_BIT|R200_TEX1_BIT|\
+             R200_PTEX_BIT)
+#define TAG(x) x##_wgpt0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (R200_XYZW_BIT|R200_RGBA_BIT|R200_SPEC_BIT)
+#define TAG(x) x##_wgfs
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (R200_XYZW_BIT|R200_RGBA_BIT|R200_SPEC_BIT|\
+            R200_TEX0_BIT)
+#define TAG(x) x##_wgfst0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (R200_XYZW_BIT|R200_RGBA_BIT|R200_SPEC_BIT|\
+            R200_TEX0_BIT|R200_PTEX_BIT)
+#define TAG(x) x##_wgfspt0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (R200_XYZW_BIT|R200_RGBA_BIT|R200_SPEC_BIT|\
+            R200_TEX0_BIT|R200_TEX1_BIT)
+#define TAG(x) x##_wgfst0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (R200_XYZW_BIT|R200_RGBA_BIT|R200_SPEC_BIT|\
+            R200_TEX0_BIT|R200_TEX1_BIT|R200_PTEX_BIT)
+#define TAG(x) x##_wgfspt0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+
+/***********************************************************************
+ *                         Initialization 
+ ***********************************************************************/
+
+static void init_setup_tab( void )
+{
+   init_wg();
+   init_wgt0();
+   init_wgpt0();
+   init_wgt0t1();
+   init_wgpt0t1();
+   init_wgfs();
+   init_wgfst0();
+   init_wgfspt0();
+   init_wgfst0t1();
+   init_wgfspt0t1();
+}
+
+
+
+void r200PrintSetupFlags(char *msg, GLuint flags )
+{
+   fprintf(stderr, "%s(%x): %s%s%s%s%s%s\n",
+          msg,
+          (int)flags,
+          (flags & R200_XYZW_BIT)      ? " xyzw," : "",
+          (flags & R200_RGBA_BIT)     ? " rgba," : "",
+          (flags & R200_SPEC_BIT)     ? " spec/fog," : "",
+          (flags & R200_TEX0_BIT)     ? " tex-0," : "",
+          (flags & R200_TEX1_BIT)     ? " tex-1," : "",
+          (flags & R200_PTEX_BIT)     ? " proj-tex," : "");
+}
+
+
+
+static void r200SetVertexFormat( GLcontext *ctx, GLuint ind ) 
+{
+   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+
+   rmesa->swtcl.SetupIndex = ind;
+
+   if (ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED)) {
+      tnl->Driver.Render.Interp = r200_interp_extras;
+      tnl->Driver.Render.CopyPV = r200_copy_pv_extras;
+   }
+   else {
+      tnl->Driver.Render.Interp = setup_tab[ind].interp;
+      tnl->Driver.Render.CopyPV = setup_tab[ind].copy_pv;
+   }
+
+   if (setup_tab[ind].vertex_format != rmesa->swtcl.vertex_format) {
+      int i;
+      R200_NEWPRIM(rmesa);
+      i = rmesa->swtcl.vertex_format = setup_tab[ind].vertex_format;
+      rmesa->swtcl.vertex_size = setup_tab[ind].vertex_size;
+      rmesa->swtcl.vertex_stride_shift = setup_tab[ind].vertex_stride_shift;
+
+      R200_STATECHANGE( rmesa, vtx );
+      rmesa->hw.vtx.cmd[VTX_VTXFMT_0] = se_vtx_fmt_0[i];
+      rmesa->hw.vtx.cmd[VTX_VTXFMT_1] = se_vtx_fmt_1[i];
+   }
+
+   {
+      GLuint vte = rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL];
+      GLuint vap = rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL];
+      GLuint needproj;
+
+      /* HW perspective divide is a win, but tiny vertex formats are a
+       * bigger one.
+       */
+      if (setup_tab[ind].vertex_format == TINY_VERTEX_FORMAT ||
+         (ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED))) {
+        needproj = GL_TRUE;
+        vte |= R200_VTX_XY_FMT | R200_VTX_Z_FMT;
+        vte &= ~R200_VTX_W0_FMT;
+        vap |= R200_VAP_FORCE_W_TO_ONE;
+      }
+      else {
+        needproj = GL_FALSE;
+        vte &= ~(R200_VTX_XY_FMT | R200_VTX_Z_FMT);
+        vte |= R200_VTX_W0_FMT;
+        vap &= ~R200_VAP_FORCE_W_TO_ONE;
+      }
+
+      _tnl_need_projected_coords( ctx, needproj );
+      if (vte != rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL]) {
+        R200_STATECHANGE( rmesa, vte );
+        rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] = vte;
+      }
+      if (vap != rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL]) {
+        R200_STATECHANGE( rmesa, vap );
+        rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] = vap;
+      }
+   }
+}
+
+static void r200RenderStart( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+
+   if (!setup_tab[rmesa->swtcl.SetupIndex].check_tex_sizes(ctx)) {
+      r200SetVertexFormat( ctx, rmesa->swtcl.SetupIndex | R200_PTEX_BIT);
+   }
+   
+   if (rmesa->dma.flush != 0 && 
+       rmesa->dma.flush != flush_last_swtcl_prim)
+      rmesa->dma.flush( rmesa );
+}
+
+
+void r200BuildVertices( GLcontext *ctx, GLuint start, GLuint count,
+                          GLuint newinputs )
+{
+   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+   GLubyte *v = ((GLubyte *)rmesa->swtcl.verts + 
+                (start << rmesa->swtcl.vertex_stride_shift));
+   GLuint stride = 1 << rmesa->swtcl.vertex_stride_shift;
+
+   newinputs |= rmesa->swtcl.SetupNewInputs;
+   rmesa->swtcl.SetupNewInputs = 0;
+
+   if (!newinputs)
+      return;
+
+   setup_tab[rmesa->swtcl.SetupIndex].emit( ctx, start, count, v, stride );
+}
+
+
+void r200ChooseVertexState( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+   GLuint ind = (R200_XYZW_BIT | R200_RGBA_BIT);
+
+   if (!rmesa->TclFallback || rmesa->Fallback)
+      return;
+
+   if (ctx->Fog.Enabled || (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR))
+      ind |= R200_SPEC_BIT;
+
+   if (ctx->Texture._EnabledUnits & 0x2)  /* unit 1 enabled */
+      ind |= R200_TEX0_BIT|R200_TEX1_BIT;
+   else if (ctx->Texture._EnabledUnits & 0x1)  /* unit 1 enabled */
+      ind |= R200_TEX0_BIT;
+
+   r200SetVertexFormat( ctx, ind );
+}
+
+
+/* Flush vertices in the current dma region.
+ */
+static void flush_last_swtcl_prim( r200ContextPtr rmesa  )
+{
+   if (R200_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   rmesa->dma.flush = 0;
+
+   if (rmesa->dma.current.buf) {
+      struct r200_dma_region *current = &rmesa->dma.current;
+      GLuint current_offset = (rmesa->r200Screen->agp_buffer_offset +
+                              current->buf->buf->idx * RADEON_BUFFER_SIZE + 
+                              current->start);
+
+      assert (!(rmesa->swtcl.hw_primitive & R200_VF_PRIM_WALK_IND));
+
+      assert (current->start + 
+             rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+             current->ptr);
+
+      if (rmesa->dma.current.start != rmesa->dma.current.ptr) {
+        r200EmitVertexAOS( rmesa,
+                             rmesa->swtcl.vertex_size,
+                             current_offset);
+
+        r200EmitVbufPrim( rmesa,
+                          rmesa->swtcl.hw_primitive,
+                          rmesa->swtcl.numverts);
+      }
+
+      rmesa->swtcl.numverts = 0;
+      current->start = current->ptr;
+   }
+}
+
+
+/* Alloc space in the current dma region.
+ */
+static __inline void *r200AllocDmaLowVerts( r200ContextPtr rmesa,
+                                             int nverts, int vsize )
+{
+   GLuint bytes = vsize * nverts;
+
+   if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) 
+      r200RefillCurrentDmaRegion( rmesa );
+
+   if (!rmesa->dma.flush) {
+      rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+      rmesa->dma.flush = flush_last_swtcl_prim;
+   }
+
+   ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
+   ASSERT( rmesa->dma.flush == flush_last_swtcl_prim );
+   ASSERT( rmesa->dma.current.start + 
+          rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+          rmesa->dma.current.ptr );
+
+
+   {
+      GLubyte *head = rmesa->dma.current.address + rmesa->dma.current.ptr;
+      rmesa->dma.current.ptr += bytes;
+      rmesa->swtcl.numverts += nverts;
+      return head;
+   }
+
+}
+
+
+
+
+void r200_emit_contiguous_verts( GLcontext *ctx, GLuint start, GLuint count )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint vertex_size = rmesa->swtcl.vertex_size * 4;
+   CARD32 *dest = r200AllocDmaLowVerts( rmesa, count-start, vertex_size );
+   setup_tab[rmesa->swtcl.SetupIndex].emit( ctx, start, count, dest, 
+                                           vertex_size );
+}
+
+
+
+void r200_emit_indexed_verts( GLcontext *ctx, GLuint start, GLuint count )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   r200AllocDmaRegionVerts( rmesa, 
+                             &rmesa->swtcl.indexed_verts, 
+                             count - start,
+                             rmesa->swtcl.vertex_size * 4, 
+                             64);
+
+   setup_tab[rmesa->swtcl.SetupIndex].emit( 
+      ctx, start, count, 
+      rmesa->swtcl.indexed_verts.address + rmesa->swtcl.indexed_verts.start, 
+      rmesa->swtcl.vertex_size * 4 );
+}
+
+
+/*
+ * Render unclipped vertex buffers by emitting vertices directly to
+ * dma buffers.  Use strip/fan hardware primitives where possible.
+ * Try to simulate missing primitives with indexed vertices.
+ */
+#define HAVE_POINTS      1
+#define HAVE_LINES       1
+#define HAVE_LINE_STRIPS 1
+#define HAVE_TRIANGLES   1
+#define HAVE_TRI_STRIPS  1
+#define HAVE_TRI_STRIP_1 0
+#define HAVE_TRI_FANS    1
+#define HAVE_QUADS       1
+#define HAVE_QUAD_STRIPS 1
+#define HAVE_POLYGONS    1
+#define HAVE_ELTS        1
+
+static const GLuint hw_prim[GL_POLYGON+1] = {
+   R200_VF_PRIM_POINTS,
+   R200_VF_PRIM_LINES,
+   0,
+   R200_VF_PRIM_LINE_STRIP,
+   R200_VF_PRIM_TRIANGLES,
+   R200_VF_PRIM_TRIANGLE_STRIP,
+   R200_VF_PRIM_TRIANGLE_FAN,
+   R200_VF_PRIM_QUADS,
+   R200_VF_PRIM_QUAD_STRIP,
+   R200_VF_PRIM_POLYGON
+};
+
+static __inline void r200DmaPrimitive( r200ContextPtr rmesa, GLenum prim )
+{
+   R200_NEWPRIM( rmesa );
+   rmesa->swtcl.hw_primitive = hw_prim[prim];
+   assert(rmesa->dma.current.ptr == rmesa->dma.current.start);
+}
+
+static __inline void r200EltPrimitive( r200ContextPtr rmesa, GLenum prim )
+{
+   R200_NEWPRIM( rmesa );
+   rmesa->swtcl.hw_primitive = hw_prim[prim] | R200_VF_PRIM_WALK_IND;
+}
+
+
+static void VERT_FALLBACK( GLcontext *ctx,
+                          GLuint start,
+                          GLuint count,
+                          GLuint flags )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   tnl->Driver.Render.PrimitiveNotify( ctx, flags & PRIM_MODE_MASK );
+   tnl->Driver.Render.BuildVertices( ctx, start, count, ~0 );
+   tnl->Driver.Render.PrimTabVerts[flags&PRIM_MODE_MASK]( ctx, start, count, flags );
+   R200_CONTEXT(ctx)->swtcl.SetupNewInputs = VERT_BIT_CLIP;
+}
+
+static void ELT_FALLBACK( GLcontext *ctx,
+                         GLuint start,
+                         GLuint count,
+                         GLuint flags )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   tnl->Driver.Render.PrimitiveNotify( ctx, flags & PRIM_MODE_MASK );
+   tnl->Driver.Render.BuildVertices( ctx, start, count, ~0 );
+   tnl->Driver.Render.PrimTabElts[flags&PRIM_MODE_MASK]( ctx, start, count, flags );
+   R200_CONTEXT(ctx)->swtcl.SetupNewInputs = VERT_BIT_CLIP;
+}
+
+
+#define LOCAL_VARS r200ContextPtr rmesa = R200_CONTEXT(ctx)
+#define ELTS_VARS  GLushort *dest
+#define INIT( prim ) r200DmaPrimitive( rmesa, prim )
+#define ELT_INIT(prim) r200EltPrimitive( rmesa, prim )
+#define NEW_PRIMITIVE()  R200_NEWPRIM( rmesa )
+#define NEW_BUFFER()  r200RefillCurrentDmaRegion( rmesa )
+#define GET_CURRENT_VB_MAX_VERTS() \
+  (((int)rmesa->dma.current.end - (int)rmesa->dma.current.ptr) / (rmesa->swtcl.vertex_size*4))
+#define GET_SUBSEQUENT_VB_MAX_VERTS() \
+  ((RADEON_BUFFER_SIZE) / (rmesa->swtcl.vertex_size*4))
+
+#define GET_CURRENT_VB_MAX_ELTS() \
+  ((R200_CMD_BUF_SZ - (rmesa->store.cmd_used + 16)) / 2)
+#define GET_SUBSEQUENT_VB_MAX_ELTS() \
+  ((R200_CMD_BUF_SZ - 1024) / 2)
+
+
+
+/* How do you extend an existing primitive?
+ */
+#define ALLOC_ELTS(nr)                                                 \
+do {                                                                   \
+   if (rmesa->dma.flush == r200FlushElts &&                            \
+       rmesa->store.cmd_used + nr*2 < R200_CMD_BUF_SZ) {               \
+                                                                       \
+      dest = (GLushort *)(rmesa->store.cmd_buf +                       \
+                         rmesa->store.cmd_used);                       \
+      rmesa->store.cmd_used += nr*2;                                   \
+   }                                                                   \
+   else {                                                              \
+      if (rmesa->dma.flush) {                                          \
+        rmesa->dma.flush( rmesa );                                     \
+      }                                                                        \
+                                                                       \
+      r200EmitVertexAOS( rmesa,                                        \
+                          rmesa->swtcl.vertex_size,                    \
+                          (rmesa->r200Screen->agp_buffer_offset +      \
+                           rmesa->swtcl.indexed_verts.buf->buf->idx *  \
+                           RADEON_BUFFER_SIZE +                        \
+                           rmesa->swtcl.indexed_verts.start));         \
+                                                                       \
+      dest = r200AllocEltsOpenEnded( rmesa,                            \
+                                      rmesa->swtcl.hw_primitive,       \
+                                      nr );                            \
+   }                                                                   \
+} while (0)
+
+#define ALLOC_ELTS_NEW_PRIMITIVE(nr) ALLOC_ELTS( nr )
+
+#ifdef MESA_BIG_ENDIAN
+/* We could do without (most of) this ugliness if dest was always 32 bit word aligned... */
+#define EMIT_ELT(offset, x) do {                                \
+        int off = offset + ( ( (GLuint)dest & 0x2 ) >> 1 );     \
+        GLushort *des = (GLushort *)( (GLuint)dest & ~0x2 );    \
+        (des)[ off + 1 - 2 * ( off & 1 ) ] = (GLushort)(x); } while (0)
+#else
+#define EMIT_ELT(offset, x) (dest)[offset] = (GLushort) (x)
+#endif
+#define EMIT_TWO_ELTS(offset, x, y)  *(GLuint *)(dest+offset) = ((y)<<16)|(x);
+#define INCR_ELTS( nr ) dest += nr
+#define RELEASE_ELT_VERTS() \
+  r200ReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, __FUNCTION__ )
+#define EMIT_VERTS( ctx, j, nr ) \
+  r200_emit_contiguous_verts(ctx, j, (j)+(nr))
+#define EMIT_INDEXED_VERTS( ctx, start, count ) \
+  r200_emit_indexed_verts( ctx, start, count )
+
+
+#define TAG(x) r200_dma_##x
+#include "tnl_dd/t_dd_dmatmp.h"
+
+
+/**********************************************************************/
+/*                          Render pipeline stage                     */
+/**********************************************************************/
+
+
+static GLboolean r200_run_render( GLcontext *ctx,
+                                   struct gl_pipeline_stage *stage )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   GLuint i, length, flags = 0;
+   render_func *tab = TAG(render_tab_verts);
+
+   if (rmesa->swtcl.indexed_verts.buf && (!VB->Elts || stage->changed_inputs)) 
+      RELEASE_ELT_VERTS();
+       
+   
+
+   if ((R200_DEBUG & DEBUG_VERTS) ||     /* No debug */
+       VB->ClipOrMask ||                /* No clipping */
+       rmesa->swtcl.RenderIndex != 0 ||  /* No per-vertex manipulations */
+       ctx->Line.StippleFlag)            /* No stipple -- fix me? */
+      return GL_TRUE;          
+
+   if (VB->Elts) {
+      tab = TAG(render_tab_elts);
+      if (!rmesa->swtcl.indexed_verts.buf)
+        if (!TAG(emit_elt_verts)(ctx, 0, VB->Count))
+           return GL_TRUE;     /* too many vertices */
+   }
+
+   tnl->Driver.Render.Start( ctx );
+
+   for (i = 0 ; !(flags & PRIM_LAST) ; i += length)
+   {
+      flags = VB->Primitive[i];
+      length = VB->PrimitiveLength[i];
+
+      if (R200_DEBUG & DEBUG_PRIMS)
+        fprintf(stderr, "r200_render.c: prim %s %d..%d\n", 
+                _mesa_lookup_enum_by_nr(flags & PRIM_MODE_MASK), 
+                i, i+length);
+
+      if (length)
+        tab[flags & PRIM_MODE_MASK]( ctx, i, i + length, flags );
+   }
+
+   tnl->Driver.Render.Finish( ctx );
+
+   return GL_FALSE;            /* finished the pipe */
+}
+
+
+
+static void r200_check_render( GLcontext *ctx,
+                                struct gl_pipeline_stage *stage )
+{
+   GLuint inputs = VERT_BIT_POS | VERT_BIT_CLIP | VERT_BIT_COLOR0;
+
+   if (ctx->RenderMode == GL_RENDER) {
+      if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR)
+        inputs |= VERT_BIT_COLOR1;
+
+      if (ctx->Texture.Unit[0]._ReallyEnabled)
+        inputs |= VERT_BIT_TEX0;
+
+      if (ctx->Texture.Unit[1]._ReallyEnabled)
+        inputs |= VERT_BIT_TEX1;
+
+      if (ctx->Fog.Enabled)
+        inputs |= VERT_BIT_FOG;
+   }
+
+   stage->inputs = inputs;
+}
+
+
+static void dtr( struct gl_pipeline_stage *stage )
+{
+   (void)stage;
+}
+
+
+const struct gl_pipeline_stage _r200_render_stage =
+{
+   "r200 render",
+   (_DD_NEW_SEPARATE_SPECULAR |
+    _NEW_TEXTURE|
+    _NEW_FOG|
+    _NEW_RENDERMODE),          /* re-check (new inputs) */
+   0,                          /* re-run (always runs) */
+   GL_TRUE,                    /* active */
+   0, 0,                       /* inputs (set in check_render), outputs */
+   0, 0,                       /* changed_inputs, private */
+   dtr,                                /* destructor */
+   r200_check_render,          /* check - initially set to alloc data */
+   r200_run_render             /* run */
+};
+
+
+
+/**************************************************************************/
+
+
+static const GLuint reduced_hw_prim[GL_POLYGON+1] = {
+   R200_VF_PRIM_POINTS,
+   R200_VF_PRIM_LINES,
+   R200_VF_PRIM_LINES,
+   R200_VF_PRIM_LINES,
+   R200_VF_PRIM_TRIANGLES,
+   R200_VF_PRIM_TRIANGLES,
+   R200_VF_PRIM_TRIANGLES,
+   R200_VF_PRIM_TRIANGLES,
+   R200_VF_PRIM_TRIANGLES,
+   R200_VF_PRIM_TRIANGLES
+};
+
+static void r200RasterPrimitive( GLcontext *ctx, GLuint hwprim );
+static void r200RenderPrimitive( GLcontext *ctx, GLenum prim );
+static void r200ResetLineStipple( GLcontext *ctx );
+
+#undef HAVE_QUADS
+#define HAVE_QUADS 0
+
+#undef HAVE_QUAD_STRIPS
+#define HAVE_QUAD_STRIPS 0
+
+/***********************************************************************
+ *                    Emit primitives as inline vertices               *
+ ***********************************************************************/
+
+#undef LOCAL_VARS
+#define CTX_ARG r200ContextPtr rmesa
+#define CTX_ARG2 rmesa
+#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size
+#define ALLOC_VERTS( n, size ) r200AllocDmaLowVerts( rmesa, n, size * 4 )
+#define LOCAL_VARS                                             \
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);           \
+   const GLuint shift = rmesa->swtcl.vertex_stride_shift;      \
+   const char *r200verts = (char *)rmesa->swtcl.verts;
+#define VERT(x) (r200Vertex *)(r200verts + (x << shift))
+#define VERTEX r200Vertex 
+#define DO_DEBUG_VERTS (1 && (R200_DEBUG & DEBUG_VERTS))
+#define PRINT_VERTEX(v) r200_print_vertex(rmesa->glCtx, v)
+#undef TAG
+#define TAG(x) r200_##x
+#include "tnl_dd/t_dd_triemit.h"
+
+
+/***********************************************************************
+ *          Macros for t_dd_tritmp.h to draw basic primitives          *
+ ***********************************************************************/
+
+#define QUAD( a, b, c, d ) r200_quad( rmesa, a, b, c, d )
+#define TRI( a, b, c )     r200_triangle( rmesa, a, b, c )
+#define LINE( a, b )       r200_line( rmesa, a, b )
+#define POINT( a )         r200_point( rmesa, a )
+
+/***********************************************************************
+ *              Build render functions from dd templates               *
+ ***********************************************************************/
+
+#define R200_TWOSIDE_BIT       0x01
+#define R200_UNFILLED_BIT      0x02
+#define R200_MAX_TRIFUNC       0x04
+
+
+static struct {
+   points_func         points;
+   line_func           line;
+   triangle_func       triangle;
+   quad_func           quad;
+} rast_tab[R200_MAX_TRIFUNC];
+
+
+#define DO_FALLBACK  0
+#define DO_UNFILLED (IND & R200_UNFILLED_BIT)
+#define DO_TWOSIDE  (IND & R200_TWOSIDE_BIT)
+#define DO_FLAT      0
+#define DO_OFFSET     0
+#define DO_TRI       1
+#define DO_QUAD      1
+#define DO_LINE      1
+#define DO_POINTS    1
+#define DO_FULL_QUAD 1
+
+#define HAVE_RGBA   1
+#define HAVE_SPEC   1
+#define HAVE_INDEX  0
+#define HAVE_BACK_COLORS  0
+#define HAVE_HW_FLATSHADE 1
+#define TAB rast_tab
+
+#define DEPTH_SCALE 1.0
+#define UNFILLED_TRI unfilled_tri
+#define UNFILLED_QUAD unfilled_quad
+#define VERT_X(_v) _v->v.x
+#define VERT_Y(_v) _v->v.y
+#define VERT_Z(_v) _v->v.z
+#define AREA_IS_CCW( a ) (a < 0)
+#define GET_VERTEX(e) (rmesa->swtcl.verts + (e<<rmesa->swtcl.vertex_stride_shift))
+
+#define VERT_SET_RGBA( v, c )    v->ui[coloroffset] = LE32_TO_CPU(*(GLuint *)c)
+#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset]
+#define VERT_SAVE_RGBA( idx )    color[idx] = CPU_TO_LE32(v[idx]->ui[coloroffset])
+#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = LE32_TO_CPU(color[idx])
+
+#define VERT_SET_SPEC( v0, c )   if (havespec) {                       \
+                                       v0->v.specular.red   = (c)[0];  \
+                                       v0->v.specular.green = (c)[1];  \
+                                       v0->v.specular.blue  = (c)[2]; }
+#define VERT_COPY_SPEC( v0, v1 ) if (havespec) {                                       \
+                                       v0->v.specular.red   = v1->v.specular.red;      \
+                                       v0->v.specular.green = v1->v.specular.green;    \
+                                       v0->v.specular.blue  = v1->v.specular.blue; }
+#define VERT_SAVE_SPEC( idx )    if (havespec) spec[idx] = CPU_TO_LE32(v[idx]->ui[5])
+#define VERT_RESTORE_SPEC( idx ) if (havespec) v[idx]->ui[5] = LE32_TO_CPU(spec[idx])
+
+#undef LOCAL_VARS
+#undef TAG
+#undef INIT
+
+#define LOCAL_VARS(n)                                                  \
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);                   \
+   GLuint color[n], spec[n];                                           \
+   GLuint coloroffset = (rmesa->swtcl.vertex_size == 4 ? 3 : 4);       \
+   GLboolean havespec = (rmesa->swtcl.vertex_size > 4);                        \
+   (void) color; (void) spec; (void) coloroffset; (void) havespec;
+
+/***********************************************************************
+ *                Helpers for rendering unfilled primitives            *
+ ***********************************************************************/
+
+#define RASTERIZE(x) r200RasterPrimitive( ctx, reduced_hw_prim[x] )
+#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive
+#undef TAG
+#define TAG(x) x
+#include "tnl_dd/t_dd_unfilled.h"
+#undef IND
+
+
+/***********************************************************************
+ *                      Generate GL render functions                   *
+ ***********************************************************************/
+
+
+#define IND (0)
+#define TAG(x) x
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (R200_TWOSIDE_BIT)
+#define TAG(x) x##_twoside
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (R200_UNFILLED_BIT)
+#define TAG(x) x##_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (R200_TWOSIDE_BIT|R200_UNFILLED_BIT)
+#define TAG(x) x##_twoside_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+
+static void init_rast_tab( void )
+{
+   init();
+   init_twoside();
+   init_unfilled();
+   init_twoside_unfilled();
+}
+
+/**********************************************************************/
+/*               Render unclipped begin/end objects                   */
+/**********************************************************************/
+
+#define VERT(x) (r200Vertex *)(r200verts + (x << shift))
+#define RENDER_POINTS( start, count )          \
+   for ( ; start < count ; start++)            \
+      r200_point( rmesa, VERT(start) )
+#define RENDER_LINE( v0, v1 ) \
+   r200_line( rmesa, VERT(v0), VERT(v1) )
+#define RENDER_TRI( v0, v1, v2 )  \
+   r200_triangle( rmesa, VERT(v0), VERT(v1), VERT(v2) )
+#define RENDER_QUAD( v0, v1, v2, v3 ) \
+   r200_quad( rmesa, VERT(v0), VERT(v1), VERT(v2), VERT(v3) )
+#define INIT(x) do {                                   \
+   r200RenderPrimitive( ctx, x );                      \
+} while (0)
+#undef LOCAL_VARS
+#define LOCAL_VARS                                             \
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);           \
+   const GLuint shift = rmesa->swtcl.vertex_stride_shift;              \
+   const char *r200verts = (char *)rmesa->swtcl.verts;         \
+   const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;       \
+   const GLboolean stipple = ctx->Line.StippleFlag;            \
+   (void) elt; (void) stipple;
+#define RESET_STIPPLE  if ( stipple ) r200ResetLineStipple( ctx );
+#define RESET_OCCLUSION
+#define PRESERVE_VB_DEFS
+#define ELT(x) (x)
+#define TAG(x) r200_##x##_verts
+#include "tnl/t_vb_rendertmp.h"
+#undef ELT
+#undef TAG
+#define TAG(x) r200_##x##_elts
+#define ELT(x) elt[x]
+#include "tnl/t_vb_rendertmp.h"
+
+
+
+/**********************************************************************/
+/*                    Choose render functions                         */
+/**********************************************************************/
+
+void r200ChooseRenderState( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint index = 0;
+   GLuint flags = ctx->_TriangleCaps;
+
+   if (!rmesa->TclFallback || rmesa->Fallback) 
+      return;
+
+   if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R200_TWOSIDE_BIT;
+   if (flags & DD_TRI_UNFILLED)      index |= R200_UNFILLED_BIT;
+
+   if (index != rmesa->swtcl.RenderIndex) {
+      tnl->Driver.Render.Points = rast_tab[index].points;
+      tnl->Driver.Render.Line = rast_tab[index].line;
+      tnl->Driver.Render.ClippedLine = rast_tab[index].line;
+      tnl->Driver.Render.Triangle = rast_tab[index].triangle;
+      tnl->Driver.Render.Quad = rast_tab[index].quad;
+
+      if (index == 0) {
+        tnl->Driver.Render.PrimTabVerts = r200_render_tab_verts;
+        tnl->Driver.Render.PrimTabElts = r200_render_tab_elts;
+        tnl->Driver.Render.ClippedPolygon = r200_fast_clipped_poly;
+      } else {
+        tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
+        tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
+        tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
+      }
+
+      rmesa->swtcl.RenderIndex = index;
+   }
+}
+
+
+/**********************************************************************/
+/*                 High level hooks for t_vb_render.c                 */
+/**********************************************************************/
+
+
+static void r200RasterPrimitive( GLcontext *ctx, GLuint hwprim )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   if (rmesa->swtcl.hw_primitive != hwprim) {
+      R200_NEWPRIM( rmesa );
+      rmesa->swtcl.hw_primitive = hwprim;
+   }
+}
+
+static void r200RenderPrimitive( GLcontext *ctx, GLenum prim )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   rmesa->swtcl.render_primitive = prim;
+   if (prim < GL_TRIANGLES || !(ctx->_TriangleCaps & DD_TRI_UNFILLED)) 
+      r200RasterPrimitive( ctx, reduced_hw_prim[prim] );
+}
+
+static void r200RenderFinish( GLcontext *ctx )
+{
+}
+
+static void r200ResetLineStipple( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   R200_STATECHANGE( rmesa, lin );
+}
+
+
+/**********************************************************************/
+/*           Transition to/from hardware rasterization.               */
+/**********************************************************************/
+
+static const char * const fallbackStrings[] = {
+   "Texture mode",
+   "glDrawBuffer(GL_FRONT_AND_BACK)",
+   "glEnable(GL_STENCIL) without hw stencil buffer",
+   "glRenderMode(selection or feedback)",
+   "glBlendEquation",
+   "glBlendFunc(mode != ADD)",
+   "R200_NO_RAST",
+   "Mixing GL_CLAMP_TO_BORDER and GL_CLAMP (or GL_MIRROR_CLAMP_ATI)"
+};
+
+
+static const char *getFallbackString(GLuint bit)
+{
+   int i = 0;
+   while (bit > 1) {
+      i++;
+      bit >>= 1;
+   }
+   return fallbackStrings[i];
+}
+
+
+void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLuint oldfallback = rmesa->Fallback;
+
+   if (mode) {
+      rmesa->Fallback |= bit;
+      if (oldfallback == 0) {
+        R200_FIREVERTICES( rmesa );
+        TCL_FALLBACK( ctx, R200_TCL_FALLBACK_RASTER, GL_TRUE );
+        _swsetup_Wakeup( ctx );
+        _tnl_need_projected_coords( ctx, GL_TRUE );
+        rmesa->swtcl.RenderIndex = ~0;
+         if (R200_DEBUG & DEBUG_FALLBACKS) {
+            fprintf(stderr, "R200 begin rasterization fallback: 0x%x %s\n",
+                    bit, getFallbackString(bit));
+         }
+      }
+   }
+   else {
+      rmesa->Fallback &= ~bit;
+      if (oldfallback == bit) {
+        _swrast_flush( ctx );
+        tnl->Driver.Render.Start = r200RenderStart;
+        tnl->Driver.Render.PrimitiveNotify = r200RenderPrimitive;
+        tnl->Driver.Render.Finish = r200RenderFinish;
+        tnl->Driver.Render.BuildVertices = r200BuildVertices;
+        tnl->Driver.Render.ResetLineStipple = r200ResetLineStipple;
+        TCL_FALLBACK( ctx, R200_TCL_FALLBACK_RASTER, GL_FALSE );
+        if (rmesa->TclFallback) {
+           /* These are already done if rmesa->TclFallback goes to
+            * zero above. But not if it doesn't (R200_NO_TCL for
+            * example?)
+            */
+           r200ChooseVertexState( ctx );
+           r200ChooseRenderState( ctx );
+        }
+         if (R200_DEBUG & DEBUG_FALLBACKS) {
+            fprintf(stderr, "R200 end rasterization fallback: 0x%x %s\n",
+                    bit, getFallbackString(bit));
+         }
+      }
+   }
+}
+
+
+
+
+/* Cope with depth operations by drawing individual pixels as points??? 
+ */
+void
+r200PointsBitmap( GLcontext *ctx, GLint px, GLint py,
+                 GLsizei width, GLsizei height,
+                 const struct gl_pixelstore_attrib *unpack,
+                 const GLubyte *bitmap )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   const GLfloat *rc = ctx->Current.RasterColor; 
+   GLint row, col;
+   r200Vertex vert;
+   GLuint orig_vte;
+   GLuint h;
+
+
+   /* Turn off tcl.  
+    */
+   TCL_FALLBACK( ctx, R200_TCL_FALLBACK_BITMAP, 1 );
+
+   /* Choose tiny vertex format
+    */
+   r200SetVertexFormat( ctx, R200_XYZW_BIT | R200_RGBA_BIT );
+
+   /* Ready for point primitives:
+    */
+   r200RenderPrimitive( ctx, GL_POINTS );
+
+   /* Turn off the hw viewport transformation:
+    */
+   R200_STATECHANGE( rmesa, vte );
+   orig_vte = rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL];
+   rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] &= ~(R200_VPORT_X_SCALE_ENA |
+                                          R200_VPORT_Y_SCALE_ENA |
+                                          R200_VPORT_Z_SCALE_ENA |
+                                          R200_VPORT_X_OFFSET_ENA |
+                                          R200_VPORT_Y_OFFSET_ENA |
+                                          R200_VPORT_Z_OFFSET_ENA); 
+
+   /* Turn off other stuff:  Stipple?, texture?, blending?, etc.
+    */
+
+
+   /* Populate the vertex
+    *
+    * Incorporate FOG into RGBA
+    */
+   if (ctx->Fog.Enabled) {
+      const GLfloat *fc = ctx->Fog.Color;
+      GLfloat color[4];
+      GLfloat f;
+
+      if (ctx->Fog.FogCoordinateSource == GL_FOG_COORDINATE_EXT)
+         f = _swrast_z_to_fogfactor(ctx, ctx->Current.Attrib[VERT_ATTRIB_FOG][0]);
+      else
+         f = _swrast_z_to_fogfactor(ctx, ctx->Current.RasterDistance);
+
+      color[0] = f * rc[0] + (1.F - f) * fc[0];
+      color[1] = f * rc[1] + (1.F - f) * fc[1];
+      color[2] = f * rc[2] + (1.F - f) * fc[2];
+      color[3] = rc[3];
+
+      UNCLAMPED_FLOAT_TO_CHAN(vert.tv.color.red,   color[0]);
+      UNCLAMPED_FLOAT_TO_CHAN(vert.tv.color.green, color[1]);
+      UNCLAMPED_FLOAT_TO_CHAN(vert.tv.color.blue,  color[2]);
+      UNCLAMPED_FLOAT_TO_CHAN(vert.tv.color.alpha, color[3]);
+   }
+   else {
+      UNCLAMPED_FLOAT_TO_CHAN(vert.tv.color.red,   rc[0]);
+      UNCLAMPED_FLOAT_TO_CHAN(vert.tv.color.green, rc[1]);
+      UNCLAMPED_FLOAT_TO_CHAN(vert.tv.color.blue,  rc[2]);
+      UNCLAMPED_FLOAT_TO_CHAN(vert.tv.color.alpha, rc[3]);
+   }
+
+
+   vert.tv.z = ctx->Current.RasterPos[2];
+
+
+   /* Update window height
+    */
+   LOCK_HARDWARE( rmesa );
+   UNLOCK_HARDWARE( rmesa );
+   h = rmesa->dri.drawable->h + rmesa->dri.drawable->y;
+   px += rmesa->dri.drawable->x;
+
+   /* Clipping handled by existing mechansims in r200_ioctl.c?
+    */
+   for (row=0; row<height; row++) {
+      const GLubyte *src = (const GLubyte *) 
+        _mesa_image_address( unpack, bitmap, width, height, 
+                             GL_COLOR_INDEX, GL_BITMAP, 0, row, 0 );
+
+      if (unpack->LsbFirst) {
+         /* Lsb first */
+         GLubyte mask = 1U << (unpack->SkipPixels & 0x7);
+         for (col=0; col<width; col++) {
+            if (*src & mask) {
+              vert.tv.x = px+col;
+              vert.tv.y = h - (py+row) - 1;
+              r200_point( rmesa, &vert );
+            }
+           src += (mask >> 7);
+           mask = ((mask << 1) & 0xff) | (mask >> 7);
+         }
+
+         /* get ready for next row */
+         if (mask != 1)
+            src++;
+      }
+      else {
+         /* Msb first */
+         GLubyte mask = 128U >> (unpack->SkipPixels & 0x7);
+         for (col=0; col<width; col++) {
+            if (*src & mask) {
+              vert.tv.x = px+col;
+              vert.tv.y = h - (py+row) - 1;
+              r200_point( rmesa, &vert );
+            }
+           src += mask & 1;
+           mask = ((mask << 7) & 0xff) | (mask >> 1);
+         }
+         /* get ready for next row */
+         if (mask != 128)
+            src++;
+      }
+   }
+
+   /* Fire outstanding vertices, restore state
+    */
+   R200_STATECHANGE( rmesa, vte );
+   rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] = orig_vte;
+
+   /* Unfallback
+    */
+   TCL_FALLBACK( ctx, R200_TCL_FALLBACK_BITMAP, 0 );
+
+   /* Need to restore vertexformat?
+    */
+   if (rmesa->TclFallback)
+      r200ChooseVertexState( ctx );
+}
+
+
+void r200FlushVertices( GLcontext *ctx, GLuint flags )
+{
+   _tnl_flush_vertices( ctx, flags );
+
+   if (flags & FLUSH_STORED_VERTICES)
+      R200_NEWPRIM( R200_CONTEXT( ctx ) );
+}
+
+/**********************************************************************/
+/*                            Initialization.                         */
+/**********************************************************************/
+
+void r200InitSwtcl( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint size = TNL_CONTEXT(ctx)->vb.Size;
+   static int firsttime = 1;
+
+   if (firsttime) {
+      init_rast_tab();
+      init_setup_tab();
+      firsttime = 0;
+   }
+
+   tnl->Driver.Render.Start = r200RenderStart;
+   tnl->Driver.Render.Finish = r200RenderFinish;
+   tnl->Driver.Render.PrimitiveNotify = r200RenderPrimitive;
+   tnl->Driver.Render.ResetLineStipple = r200ResetLineStipple;
+   tnl->Driver.Render.BuildVertices = r200BuildVertices;
+
+   rmesa->swtcl.verts = (char *)ALIGN_MALLOC( size * 16 * 4, 32 );
+   rmesa->swtcl.RenderIndex = ~0;
+   rmesa->swtcl.render_primitive = GL_TRIANGLES;
+   rmesa->swtcl.hw_primitive = 0;
+}
+
+
+void r200DestroySwtcl( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   if (rmesa->swtcl.indexed_verts.buf) 
+      r200ReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, __FUNCTION__ );
+
+   if (rmesa->swtcl.verts) {
+      ALIGN_FREE(rmesa->swtcl.verts);
+      rmesa->swtcl.verts = 0;
+   }
+
+   if (rmesa->UbyteSecondaryColor.Ptr) {
+      ALIGN_FREE(rmesa->UbyteSecondaryColor.Ptr);
+      rmesa->UbyteSecondaryColor.Ptr = 0;
+   }
+
+   if (rmesa->UbyteColor.Ptr) {
+      ALIGN_FREE(rmesa->UbyteColor.Ptr);
+      rmesa->UbyteColor.Ptr = 0;
+   }
+}
diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.h b/src/mesa/drivers/dri/r200/r200_swtcl.h
new file mode 100644 (file)
index 0000000..9a36fb6
--- /dev/null
@@ -0,0 +1,80 @@
+/* $XFree86$ */
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef __R200_SWTCL_H__
+#define __R200_SWTCL_H__
+
+#include "mtypes.h"
+#include "swrast/swrast.h"
+#include "r200_context.h"
+
+extern void r200InitSwtcl( GLcontext *ctx );
+extern void r200DestroySwtcl( GLcontext *ctx );
+
+extern void r200FlushVertices( GLcontext *ctx, GLuint flags );
+extern void r200ChooseRenderState( GLcontext *ctx );
+extern void r200ChooseVertexState( GLcontext *ctx );
+
+extern void r200CheckTexSizes( GLcontext *ctx );
+
+extern void r200BuildVertices( GLcontext *ctx, GLuint start, GLuint count,
+                                GLuint newinputs );
+
+extern void r200PrintSetupFlags(char *msg, GLuint flags );
+
+
+extern void r200_emit_contiguous_verts( GLcontext *ctx,
+                                         GLuint start,
+                                         GLuint count );
+
+extern void r200_emit_indexed_verts( GLcontext *ctx,
+                                      GLuint start,
+                                      GLuint count );
+
+extern void r200_translate_vertex( GLcontext *ctx, 
+                                    const r200Vertex *src, 
+                                    SWvertex *dst );
+
+extern void r200_print_vertex( GLcontext *ctx, const r200Vertex *v );
+
+extern void r200_import_float_colors( GLcontext *ctx );
+extern void r200_import_float_spec_colors( GLcontext *ctx );
+
+extern void r200PointsBitmap( GLcontext *ctx, GLint px, GLint py,
+                             GLsizei width, GLsizei height,
+                             const struct gl_pixelstore_attrib *unpack,
+                             const GLubyte *bitmap );
+
+
+#endif
diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c
new file mode 100644 (file)
index 0000000..994221d
--- /dev/null
@@ -0,0 +1,549 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "glheader.h"
+#include "imports.h"
+#include "mtypes.h"
+#include "enums.h"
+#include "colormac.h"
+#include "light.h"
+
+#include "array_cache/acache.h"
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+
+#include "r200_context.h"
+#include "r200_state.h"
+#include "r200_ioctl.h"
+#include "r200_tex.h"
+#include "r200_tcl.h"
+#include "r200_swtcl.h"
+#include "r200_maos.h"
+
+
+
+#define HAVE_POINTS      1
+#define HAVE_LINES       1
+#define HAVE_LINE_LOOP   0
+#define HAVE_LINE_STRIPS 1
+#define HAVE_TRIANGLES   1
+#define HAVE_TRI_STRIPS  1
+#define HAVE_TRI_STRIP_1 0
+#define HAVE_TRI_FANS    1
+#define HAVE_QUADS       0     /* hw quad verts in wrong order??? */
+#define HAVE_QUAD_STRIPS 1
+#define HAVE_POLYGONS    1
+#define HAVE_ELTS        1
+
+
+#define HW_POINTS           R200_VF_PRIM_POINTS
+#define HW_LINES            R200_VF_PRIM_LINES
+#define HW_LINE_LOOP        0
+#define HW_LINE_STRIP       R200_VF_PRIM_LINE_STRIP
+#define HW_TRIANGLES        R200_VF_PRIM_TRIANGLES
+#define HW_TRIANGLE_STRIP_0 R200_VF_PRIM_TRIANGLE_STRIP
+#define HW_TRIANGLE_STRIP_1 0
+#define HW_TRIANGLE_FAN     R200_VF_PRIM_TRIANGLE_FAN
+#define HW_QUADS            R200_VF_PRIM_QUADS
+#define HW_QUAD_STRIP       R200_VF_PRIM_QUAD_STRIP
+#define HW_POLYGON          R200_VF_PRIM_POLYGON
+
+
+static GLboolean discrete_prim[0x10] = {
+   0,                          /* 0 none */
+   1,                          /* 1 points */
+   1,                          /* 2 lines */
+   0,                          /* 3 line_strip */
+   1,                          /* 4 tri_list */
+   0,                          /* 5 tri_fan */
+   0,                          /* 6 tri_strip */
+   0,                          /* 7 tri_w_flags */
+   1,                          /* 8 rect list (unused) */
+   1,                          /* 9 3vert point */
+   1,                          /* a 3vert line */
+   0,                          /* b point sprite */
+   0,                          /* c line loop */
+   1,                          /* d quads */
+   0,                          /* e quad strip */
+   0,                          /* f polygon */
+};
+   
+
+#define LOCAL_VARS r200ContextPtr rmesa = R200_CONTEXT(ctx);rmesa = rmesa
+#define ELTS_VARS  GLushort *dest
+
+#define ELT_INIT(prim, hw_prim) \
+   r200TclPrimitive( ctx, prim, hw_prim | R200_VF_PRIM_WALK_IND )
+
+#define GET_ELTS() rmesa->tcl.Elts
+
+
+#define NEW_PRIMITIVE()  R200_NEWPRIM( rmesa )
+#define NEW_BUFFER()  r200RefillCurrentDmaRegion( rmesa )
+
+/* Don't really know how many elts will fit in what's left of cmdbuf,
+ * as there is state to emit, etc:
+ */
+
+#if 0
+#define GET_CURRENT_VB_MAX_ELTS() \
+   ((R200_CMD_BUF_SZ - (rmesa->store.cmd_used + 16)) / 2) 
+#define GET_SUBSEQUENT_VB_MAX_ELTS() ((R200_CMD_BUF_SZ - 16) / 2) 
+#else
+/* Testing on isosurf shows a maximum around here.  Don't know if it's
+ * the card or driver or kernel module that is causing the behaviour.
+ */
+#define GET_CURRENT_VB_MAX_ELTS() 300
+#define GET_SUBSEQUENT_VB_MAX_ELTS() 300
+#endif
+
+#define RESET_STIPPLE() do {                   \
+   R200_STATECHANGE( rmesa, lin );             \
+   r200EmitState( rmesa );                     \
+} while (0)
+
+#define AUTO_STIPPLE( mode )  do {             \
+   R200_STATECHANGE( rmesa, lin );             \
+   if (mode)                                   \
+      rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] |=        \
+        R200_LINE_PATTERN_AUTO_RESET;  \
+   else                                                \
+      rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &=        \
+        ~R200_LINE_PATTERN_AUTO_RESET; \
+   r200EmitState( rmesa );                     \
+} while (0)
+
+
+/* How do you extend an existing primitive?
+ */
+#define ALLOC_ELTS(nr)                                                 \
+do {                                                                   \
+   if (rmesa->dma.flush == r200FlushElts &&                            \
+       rmesa->store.cmd_used + nr*2 < R200_CMD_BUF_SZ) {               \
+                                                                       \
+      dest = (GLushort *)(rmesa->store.cmd_buf +                       \
+                         rmesa->store.cmd_used);                       \
+      rmesa->store.cmd_used += nr*2;                                   \
+   }                                                                   \
+   else {                                                              \
+      if (rmesa->dma.flush)                                            \
+        rmesa->dma.flush( rmesa );                                     \
+                                                                       \
+      r200EmitAOS( rmesa,                                              \
+                    rmesa->tcl.aos_components,                         \
+                    rmesa->tcl.nr_aos_components,                      \
+                    0 );                                               \
+                                                                       \
+      dest = r200AllocEltsOpenEnded( rmesa,                            \
+                                      rmesa->tcl.hw_primitive,         \
+                                      nr );                            \
+   }                                                                   \
+} while (0) 
+
+
+
+/* TODO: Try to extend existing primitive if both are identical,
+ * discrete and there are no intervening state changes.  (Somewhat
+ * duplicates changes to DrawArrays code)
+ */
+static void EMIT_PRIM( GLcontext *ctx, 
+                      GLenum prim, 
+                      GLuint hwprim, 
+                      GLuint start, 
+                      GLuint count)    
+{
+   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+   r200TclPrimitive( ctx, prim, hwprim );
+   
+   r200EmitAOS( rmesa,
+                 rmesa->tcl.aos_components,
+                 rmesa->tcl.nr_aos_components,
+                 start );
+   
+   /* Why couldn't this packet have taken an offset param?
+    */
+   r200EmitVbufPrim( rmesa,
+                    rmesa->tcl.hw_primitive,
+                    count - start );
+}
+
+
+
+/* Try & join small primitives
+ */
+#if 0
+#define PREFER_DISCRETE_ELT_PRIM( NR, PRIM ) 0
+#else
+#define PREFER_DISCRETE_ELT_PRIM( NR, PRIM )                   \
+  ((NR) < 20 ||                                                        \
+   ((NR) < 40 &&                                               \
+    rmesa->tcl.hw_primitive == (PRIM|                          \
+                           R200_VF_TCL_OUTPUT_VTX_ENABLE|      \
+                               R200_VF_PRIM_WALK_IND)))
+#endif
+
+#ifdef MESA_BIG_ENDIAN
+/* We could do without (most of) this ugliness if dest was always 32 bit word aligned... */
+#define EMIT_ELT(offset, x) do {                                \
+        int off = offset + ( ( (GLuint)dest & 0x2 ) >> 1 );     \
+        GLushort *des = (GLushort *)( (GLuint)dest & ~0x2 );    \
+        (des)[ off + 1 - 2 * ( off & 1 ) ] = (GLushort)(x); } while (0)
+#else
+#define EMIT_ELT(offset, x) (dest)[offset] = (GLushort) (x)
+#endif
+#define EMIT_TWO_ELTS(offset, x, y)  *(GLuint *)(dest+offset) = ((y)<<16)|(x);
+#define INCR_ELTS( nr ) dest += nr
+#define RELEASE_ELT_VERTS() \
+   r200ReleaseArrays( ctx, ~0 )
+
+
+
+#define TAG(x) tcl_##x
+#include "tnl_dd/t_dd_dmatmp2.h"
+
+/**********************************************************************/
+/*                          External entrypoints                     */
+/**********************************************************************/
+
+void r200EmitPrimitive( GLcontext *ctx, 
+                         GLuint first,
+                         GLuint last,
+                         GLuint flags )
+{
+   tcl_render_tab_verts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
+}
+
+void r200EmitEltPrimitive( GLcontext *ctx, 
+                            GLuint first,
+                            GLuint last,
+                            GLuint flags )
+{
+   tcl_render_tab_elts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
+}
+
+void r200TclPrimitive( GLcontext *ctx, 
+                        GLenum prim,
+                        int hw_prim )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint newprim = hw_prim | R200_VF_TCL_OUTPUT_VTX_ENABLE;
+
+   if (newprim != rmesa->tcl.hw_primitive ||
+       !discrete_prim[hw_prim&0xf]) {
+      R200_NEWPRIM( rmesa );
+      rmesa->tcl.hw_primitive = newprim;
+   }
+}
+
+
+/**********************************************************************/
+/*                          Render pipeline stage                     */
+/**********************************************************************/
+
+
+/* TCL render.
+ */
+static GLboolean r200_run_tcl_render( GLcontext *ctx,
+                                       struct gl_pipeline_stage *stage )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   GLuint i,flags = 0,length;
+
+   /* TODO: separate this from the swtnl pipeline 
+    */
+   if (rmesa->TclFallback)
+      return GL_TRUE;  /* fallback to software t&l */
+
+   if (R200_DEBUG & DEBUG_PRIMS)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (VB->Count == 0)
+      return GL_FALSE;
+
+   r200ReleaseArrays( ctx, ~0 /* stage->changed_inputs */ );
+   r200EmitArrays( ctx, stage->inputs );
+
+   rmesa->tcl.Elts = VB->Elts;
+
+   for (i = VB->FirstPrimitive ; !(flags & PRIM_LAST) ; i += length)
+   {
+      flags = VB->Primitive[i];
+      length = VB->PrimitiveLength[i];
+
+      if (R200_DEBUG & DEBUG_PRIMS)
+        fprintf(stderr, "%s: prim %s %d..%d\n", 
+                __FUNCTION__,
+                _mesa_lookup_enum_by_nr(flags & PRIM_MODE_MASK), 
+                i, i+length);
+
+      if (!length)
+        continue;
+
+      if (rmesa->tcl.Elts)
+        r200EmitEltPrimitive( ctx, i, i+length, flags );
+      else
+        r200EmitPrimitive( ctx, i, i+length, flags );
+   }
+
+   return GL_FALSE;            /* finished the pipe */
+}
+
+
+
+static void r200_check_tcl_render( GLcontext *ctx,
+                                    struct gl_pipeline_stage *stage )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint inputs = VERT_BIT_POS;
+
+   /* Validate state:
+    */
+   if (rmesa->NewGLState)
+      r200ValidateState( ctx );
+
+   if (ctx->RenderMode == GL_RENDER) {
+      /* Make all this event-driven:
+       */
+      if (ctx->Light.Enabled) {
+        inputs |= VERT_BIT_NORMAL;
+
+        if (1 || ctx->Light.ColorMaterialEnabled) {
+           inputs |= VERT_BIT_COLOR0;
+        }
+      }
+      else {
+        inputs |= VERT_BIT_COLOR0;
+        
+        if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) {
+           inputs |= VERT_BIT_COLOR1;
+        }
+      }
+
+      if (ctx->Texture.Unit[0]._ReallyEnabled) {
+        if (ctx->Texture.Unit[0].TexGenEnabled) {
+           if (rmesa->TexGenNeedNormals[0]) {
+              inputs |= VERT_BIT_NORMAL;
+           }
+        } else {
+           inputs |= VERT_BIT_TEX0;
+        }
+      }
+
+      if (ctx->Texture.Unit[1]._ReallyEnabled) {
+        if (ctx->Texture.Unit[1].TexGenEnabled) {
+           if (rmesa->TexGenNeedNormals[1]) {
+              inputs |= VERT_BIT_NORMAL;
+           }
+        } else {
+           inputs |= VERT_BIT_TEX1;
+        }
+      }
+
+      stage->inputs = inputs;
+      stage->active = 1;
+   }
+   else
+      stage->active = 0;
+}
+
+static void r200_init_tcl_render( GLcontext *ctx,
+                                   struct gl_pipeline_stage *stage )
+{
+   stage->check = r200_check_tcl_render;
+   stage->check( ctx, stage );
+}
+
+static void dtr( struct gl_pipeline_stage *stage )
+{
+   (void)stage;
+}
+
+
+/* Initial state for tcl stage.  
+ */
+const struct gl_pipeline_stage _r200_tcl_stage =
+{
+   "r200 render",
+   (_DD_NEW_SEPARATE_SPECULAR |
+    _NEW_LIGHT|
+    _NEW_TEXTURE|
+    _NEW_FOG|
+    _NEW_RENDERMODE),          /* re-check (new inputs) */
+   0,                          /* re-run (always runs) */
+   GL_TRUE,                    /* active */
+   0, 0,                       /* inputs (set in check_render), outputs */
+   0, 0,                       /* changed_inputs, private */
+   dtr,                                /* destructor */
+   r200_init_tcl_render,       /* check - initially set to alloc data */
+   r200_run_tcl_render /* run */
+};
+
+
+
+/**********************************************************************/
+/*                 Validate state at pipeline start                   */
+/**********************************************************************/
+
+
+/*-----------------------------------------------------------------------
+ * Manage TCL fallbacks
+ */
+
+
+static void transition_to_swtnl( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+
+   R200_NEWPRIM( rmesa );
+   rmesa->swtcl.vertex_format = 0;
+
+   r200ChooseVertexState( ctx );
+   r200ChooseRenderState( ctx );
+
+   _mesa_validate_all_lighting_tables( ctx ); 
+
+   tnl->Driver.NotifyMaterialChange = 
+      _mesa_validate_all_lighting_tables;
+
+   r200ReleaseArrays( ctx, ~0 );
+
+   /* Still using the D3D based hardware-rasterizer from the radeon;
+    * need to put the card into D3D mode to make it work:
+    */
+   R200_STATECHANGE( rmesa, vap );
+   rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] &= ~R200_VAP_TCL_ENABLE;
+   rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_D3D_TEX_DEFAULT;
+
+   R200_STATECHANGE( rmesa, vte );
+   rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] &= ~R200_VTX_W0_FMT;
+
+   R200_STATECHANGE( rmesa, set );
+   rmesa->hw.set.cmd[SET_RE_CNTL] |= (R200_VTX_STQ0_D3D |
+                                     R200_VTX_STQ1_D3D);
+}
+
+
+static void transition_to_hwtnl( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+
+   _tnl_need_projected_coords( ctx, GL_FALSE );
+
+   r200UpdateMaterial( ctx );
+
+   tnl->Driver.NotifyMaterialChange = r200UpdateMaterial;
+
+   if ( rmesa->dma.flush )                     
+      rmesa->dma.flush( rmesa );       
+
+   rmesa->dma.flush = 0;
+   rmesa->swtcl.vertex_format = 0;
+   
+   if (rmesa->swtcl.indexed_verts.buf) 
+      r200ReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, 
+                             __FUNCTION__ );
+
+   R200_STATECHANGE( rmesa, vap );
+   rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_TCL_ENABLE;
+   rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] &= ~(R200_VAP_FORCE_W_TO_ONE |
+                                          R200_VAP_D3D_TEX_DEFAULT);
+
+   R200_STATECHANGE( rmesa, vte );
+   rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] &= ~(R200_VTX_XY_FMT|R200_VTX_Z_FMT);
+   rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] |= R200_VTX_W0_FMT;
+
+   R200_STATECHANGE( rmesa, set );
+   rmesa->hw.set.cmd[SET_RE_CNTL] &= ~(R200_VTX_STQ0_D3D |
+                                      R200_VTX_STQ1_D3D);
+
+
+   if (R200_DEBUG & DEBUG_FALLBACKS) 
+      fprintf(stderr, "R200 end tcl fallback\n");
+}
+
+
+static char *fallbackStrings[] = {
+   "Rasterization fallback",
+   "Unfilled triangles",
+   "Twosided lighting, differing materials",
+   "Materials in VB (maybe between begin/end)",
+   "Texgen unit 0",
+   "Texgen unit 1",
+   "Texgen unit 2",
+   "User disable"
+};
+
+
+static char *getFallbackString(GLuint bit)
+{
+   int i = 0;
+   while (bit > 1) {
+      i++;
+      bit >>= 1;
+   }
+   return fallbackStrings[i];
+}
+
+
+
+void r200TclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint oldfallback = rmesa->TclFallback;
+
+   if (mode) {
+      rmesa->TclFallback |= bit;
+      if (oldfallback == 0) {
+        if (R200_DEBUG & DEBUG_FALLBACKS) 
+           fprintf(stderr, "R200 begin tcl fallback %s\n",
+                   getFallbackString( bit ));
+        transition_to_swtnl( ctx );
+      }
+   }
+   else {
+      rmesa->TclFallback &= ~bit;
+      if (oldfallback == bit) {
+        if (R200_DEBUG & DEBUG_FALLBACKS) 
+           fprintf(stderr, "R200 end tcl fallback %s\n",
+                   getFallbackString( bit ));
+        transition_to_hwtnl( ctx );
+      }
+   }
+}
diff --git a/src/mesa/drivers/dri/r200/r200_tcl.h b/src/mesa/drivers/dri/r200/r200_tcl.h
new file mode 100644 (file)
index 0000000..a2db2e1
--- /dev/null
@@ -0,0 +1,67 @@
+/* $XFree86$ */
+/**************************************************************************
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef __R200_TCL_H__
+#define __R200_TCL_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+#include "r200_context.h"
+
+extern void r200TclPrimitive( GLcontext *ctx, GLenum prim, int hw_prim );
+extern void r200EmitEltPrimitive( GLcontext *ctx, GLuint first, GLuint last,
+                                   GLuint flags );
+extern void r200EmitPrimitive( GLcontext *ctx, GLuint first, GLuint last,
+                                GLuint flags );
+
+extern void r200TclFallback( GLcontext *ctx, GLuint bit, GLboolean mode );
+                                             
+#define R200_TCL_FALLBACK_RASTER            0x1 /* rasterization */
+#define R200_TCL_FALLBACK_UNFILLED          0x2 /* unfilled tris */
+#define R200_TCL_FALLBACK_LIGHT_TWOSIDE     0x4 /* twoside tris */
+#define R200_TCL_FALLBACK_MATERIAL          0x8 /* material in vb */
+#define R200_TCL_FALLBACK_TEXGEN_0          0x10 /* texgen, unit 0 */
+#define R200_TCL_FALLBACK_TEXGEN_1          0x20 /* texgen, unit 1 */
+#define R200_TCL_FALLBACK_TEXGEN_2          0x40 /* texgen, unit 2 */
+#define R200_TCL_FALLBACK_TCL_DISABLE       0x80 /* user disable */
+#define R200_TCL_FALLBACK_BITMAP            0x100 /* draw bitmap with points */
+
+#define R200_MAX_TCL_VERTSIZE (4*4) /* using maos now... */
+
+#define TCL_FALLBACK( ctx, bit, mode ) r200TclFallback( ctx, bit, mode )
+
+
+#endif
+#endif
diff --git a/src/mesa/drivers/dri/r200/r200_tex.c b/src/mesa/drivers/dri/r200/r200_tex.c
new file mode 100644 (file)
index 0000000..5f8d06a
--- /dev/null
@@ -0,0 +1,1002 @@
+/* $XFree86$ */
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "glheader.h"
+#include "imports.h"
+#include "colormac.h"
+#include "context.h"
+#include "enums.h"
+#include "image.h"
+#include "simple_list.h"
+#include "texformat.h"
+#include "texstore.h"
+#include "texutil.h"
+#include "texmem.h"
+#include "teximage.h"
+
+#include "r200_context.h"
+#include "r200_state.h"
+#include "r200_ioctl.h"
+#include "r200_swtcl.h"
+#include "r200_tex.h"
+
+
+
+/**
+ * Set the texture wrap modes.
+ * 
+ * \param t Texture object whose wrap modes are to be set
+ * \param swrap Wrap mode for the \a s texture coordinate
+ * \param twrap Wrap mode for the \a t texture coordinate
+ */
+
+static void r200SetTexWrap( r200TexObjPtr t, GLenum swrap, GLenum twrap, GLenum rwrap )
+{
+   GLboolean  is_clamp = GL_FALSE;
+   GLboolean  is_clamp_to_border = GL_FALSE;
+
+   t->pp_txfilter &= ~(R200_CLAMP_S_MASK | R200_CLAMP_T_MASK | R200_BORDER_MODE_D3D);
+
+   switch ( swrap ) {
+   case GL_REPEAT:
+      t->pp_txfilter |= R200_CLAMP_S_WRAP;
+      break;
+   case GL_CLAMP:
+      t->pp_txfilter |= R200_CLAMP_S_CLAMP_GL;
+      is_clamp = GL_TRUE;
+      break;
+   case GL_CLAMP_TO_EDGE:
+      t->pp_txfilter |= R200_CLAMP_S_CLAMP_LAST;
+      break;
+   case GL_CLAMP_TO_BORDER:
+      t->pp_txfilter |= R200_CLAMP_S_CLAMP_GL;
+      is_clamp_to_border = GL_TRUE;
+      break;
+   case GL_MIRRORED_REPEAT:
+      t->pp_txfilter |= R200_CLAMP_S_MIRROR;
+      break;
+   case GL_MIRROR_CLAMP_ATI:
+      t->pp_txfilter |= R200_CLAMP_S_MIRROR_CLAMP_GL;
+      is_clamp = GL_TRUE;
+      break;
+   case GL_MIRROR_CLAMP_TO_EDGE_ATI:
+      t->pp_txfilter |= R200_CLAMP_S_MIRROR_CLAMP_LAST;
+      break;
+   default:
+      _mesa_problem(NULL, "bad S wrap mode in %s", __FUNCTION__);
+   }
+
+   switch ( twrap ) {
+   case GL_REPEAT:
+      t->pp_txfilter |= R200_CLAMP_T_WRAP;
+      break;
+   case GL_CLAMP:
+      t->pp_txfilter |= R200_CLAMP_T_CLAMP_GL;
+      is_clamp = GL_TRUE;
+      break;
+   case GL_CLAMP_TO_EDGE:
+      t->pp_txfilter |= R200_CLAMP_T_CLAMP_LAST;
+      break;
+   case GL_CLAMP_TO_BORDER:
+      t->pp_txfilter |= R200_CLAMP_T_CLAMP_GL | R200_BORDER_MODE_D3D;
+      is_clamp_to_border = GL_TRUE;
+      break;
+   case GL_MIRRORED_REPEAT:
+      t->pp_txfilter |= R200_CLAMP_T_MIRROR;
+      break;
+   case GL_MIRROR_CLAMP_ATI:
+      t->pp_txfilter |= R200_CLAMP_T_MIRROR_CLAMP_GL;
+      is_clamp = GL_TRUE;
+      break;
+   case GL_MIRROR_CLAMP_TO_EDGE_ATI:
+      t->pp_txfilter |= R200_CLAMP_T_MIRROR_CLAMP_LAST;
+      break;
+   default:
+      _mesa_problem(NULL, "bad T wrap mode in %s", __FUNCTION__);
+   }
+
+   t->pp_txformat_x &= ~R200_CLAMP_Q_MASK;
+
+   switch ( rwrap ) {
+   case GL_REPEAT:
+      t->pp_txformat_x |= R200_CLAMP_Q_WRAP;
+      break;
+   case GL_CLAMP:
+      t->pp_txformat_x |= R200_CLAMP_Q_CLAMP_GL;
+      is_clamp = GL_TRUE;
+      break;
+   case GL_CLAMP_TO_EDGE:
+      t->pp_txformat_x |= R200_CLAMP_Q_CLAMP_LAST;
+      break;
+   case GL_CLAMP_TO_BORDER:
+      t->pp_txformat_x |= R200_CLAMP_Q_CLAMP_GL;
+      is_clamp_to_border = GL_TRUE;
+      break;
+   case GL_MIRRORED_REPEAT:
+      t->pp_txformat_x |= R200_CLAMP_Q_MIRROR;
+      break;
+   case GL_MIRROR_CLAMP_ATI:
+      t->pp_txformat_x |= R200_CLAMP_Q_MIRROR_CLAMP_GL;
+      is_clamp = GL_TRUE;
+      break;
+   case GL_MIRROR_CLAMP_TO_EDGE_ATI:
+      t->pp_txformat_x |= R200_CLAMP_Q_MIRROR_CLAMP_LAST;
+      break;
+   default:
+      _mesa_problem(NULL, "bad R wrap mode in %s", __FUNCTION__);
+   }
+
+   if ( is_clamp_to_border ) {
+      t->pp_txfilter |= R200_BORDER_MODE_D3D;
+   }
+
+   t->border_fallback = (is_clamp && is_clamp_to_border);
+}
+
+static void r200SetTexMaxAnisotropy( r200TexObjPtr t, GLfloat max )
+{
+   t->pp_txfilter &= ~R200_MAX_ANISO_MASK;
+
+   if ( max == 1.0 ) {
+      t->pp_txfilter |= R200_MAX_ANISO_1_TO_1;
+   } else if ( max <= 2.0 ) {
+      t->pp_txfilter |= R200_MAX_ANISO_2_TO_1;
+   } else if ( max <= 4.0 ) {
+      t->pp_txfilter |= R200_MAX_ANISO_4_TO_1;
+   } else if ( max <= 8.0 ) {
+      t->pp_txfilter |= R200_MAX_ANISO_8_TO_1;
+   } else {
+      t->pp_txfilter |= R200_MAX_ANISO_16_TO_1;
+   }
+}
+
+/**
+ * Set the texture magnification and minification modes.
+ * 
+ * \param t Texture whose filter modes are to be set
+ * \param minf Texture minification mode
+ * \param magf Texture magnification mode
+ */
+
+static void r200SetTexFilter( r200TexObjPtr t, GLenum minf, GLenum magf )
+{
+   GLuint anisotropy = (t->pp_txfilter & R200_MAX_ANISO_MASK);
+
+   t->pp_txfilter &= ~(R200_MIN_FILTER_MASK | R200_MAG_FILTER_MASK);
+   t->pp_txformat_x &= ~R200_VOLUME_FILTER_MASK;
+
+   if ( anisotropy == R200_MAX_ANISO_1_TO_1 ) {
+      switch ( minf ) {
+      case GL_NEAREST:
+        t->pp_txfilter |= R200_MIN_FILTER_NEAREST;
+        break;
+      case GL_LINEAR:
+        t->pp_txfilter |= R200_MIN_FILTER_LINEAR;
+        break;
+      case GL_NEAREST_MIPMAP_NEAREST:
+        t->pp_txfilter |= R200_MIN_FILTER_NEAREST_MIP_NEAREST;
+        break;
+      case GL_NEAREST_MIPMAP_LINEAR:
+        t->pp_txfilter |= R200_MIN_FILTER_LINEAR_MIP_NEAREST;
+        break;
+      case GL_LINEAR_MIPMAP_NEAREST:
+        t->pp_txfilter |= R200_MIN_FILTER_NEAREST_MIP_LINEAR;
+        break;
+      case GL_LINEAR_MIPMAP_LINEAR:
+        t->pp_txfilter |= R200_MIN_FILTER_LINEAR_MIP_LINEAR;
+        break;
+      }
+   } else {
+      switch ( minf ) {
+      case GL_NEAREST:
+        t->pp_txfilter |= R200_MIN_FILTER_ANISO_NEAREST;
+        break;
+      case GL_LINEAR:
+        t->pp_txfilter |= R200_MIN_FILTER_ANISO_LINEAR;
+        break;
+      case GL_NEAREST_MIPMAP_NEAREST:
+      case GL_LINEAR_MIPMAP_NEAREST:
+        t->pp_txfilter |= R200_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST;
+        break;
+      case GL_NEAREST_MIPMAP_LINEAR:
+      case GL_LINEAR_MIPMAP_LINEAR:
+        t->pp_txfilter |= R200_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR;
+        break;
+      }
+   }
+
+   /* Note we don't have 3D mipmaps so only use the mag filter setting
+    * to set the 3D texture filter mode.
+    */
+   switch ( magf ) {
+   case GL_NEAREST:
+      t->pp_txfilter |= R200_MAG_FILTER_NEAREST;
+      t->pp_txformat_x |= R200_VOLUME_FILTER_NEAREST;
+      break;
+   case GL_LINEAR:
+      t->pp_txfilter |= R200_MAG_FILTER_LINEAR;
+      t->pp_txformat_x |= R200_VOLUME_FILTER_LINEAR;
+      break;
+   }
+}
+
+static void r200SetTexBorderColor( r200TexObjPtr t, GLubyte c[4] )
+{
+   t->pp_border_color = r200PackColor( 4, c[0], c[1], c[2], c[3] );
+}
+
+
+/**
+ * Allocate space for and load the mesa images into the texture memory block.
+ * This will happen before drawing with a new texture, or drawing with a
+ * texture after it was swapped out or teximaged again.
+ */
+
+static r200TexObjPtr r200AllocTexObj( struct gl_texture_object *texObj )
+{
+   r200TexObjPtr t;
+
+   t = CALLOC_STRUCT( r200_tex_obj );
+   texObj->DriverData = t;
+   if ( t != NULL ) {
+      if ( R200_DEBUG & DEBUG_TEXTURE ) {
+        fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, texObj, t );
+      }
+
+      /* Initialize non-image-dependent parts of the state:
+       */
+      t->base.tObj = texObj;
+      t->border_fallback = GL_FALSE;
+
+      make_empty_list( & t->base );
+
+      r200SetTexWrap( t, texObj->WrapS, texObj->WrapT, texObj->WrapR );
+      r200SetTexMaxAnisotropy( t, texObj->MaxAnisotropy );
+      r200SetTexFilter( t, texObj->MinFilter, texObj->MagFilter );
+      r200SetTexBorderColor( t, texObj->_BorderChan );
+   }
+
+   return t;
+}
+
+
+static const struct gl_texture_format *
+r200ChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
+                           GLenum format, GLenum type )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   const GLboolean do32bpt = ( rmesa->r200Screen->cpp == 4 );
+
+   switch ( internalFormat ) {
+   case 4:
+   case GL_RGBA:
+   case GL_COMPRESSED_RGBA:
+      if ( format == GL_BGRA ) {
+        if ( type == GL_UNSIGNED_INT_8_8_8_8_REV ) {
+           return &_mesa_texformat_argb8888;
+        }
+         else if ( type == GL_UNSIGNED_SHORT_4_4_4_4_REV ) {
+            return &_mesa_texformat_argb4444;
+        }
+         else if ( type == GL_UNSIGNED_SHORT_1_5_5_5_REV ) {
+           return &_mesa_texformat_argb1555;
+        }
+      }
+      return do32bpt ? &_mesa_texformat_rgba8888 : &_mesa_texformat_argb4444;
+
+   case 3:
+   case GL_RGB:
+   case GL_COMPRESSED_RGB:
+      if ( format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5 ) {
+        return &_mesa_texformat_rgb565;
+      }
+      return do32bpt ? &_mesa_texformat_rgba8888 : &_mesa_texformat_rgb565;
+
+   case GL_RGBA8:
+   case GL_RGB10_A2:
+   case GL_RGBA12:
+   case GL_RGBA16:
+      return do32bpt ? &_mesa_texformat_rgba8888 : &_mesa_texformat_argb4444;
+
+   case GL_RGBA4:
+   case GL_RGBA2:
+      return &_mesa_texformat_argb4444;
+
+   case GL_RGB5_A1:
+      return &_mesa_texformat_argb1555;
+
+   case GL_RGB8:
+   case GL_RGB10:
+   case GL_RGB12:
+   case GL_RGB16:
+      return do32bpt ? &_mesa_texformat_rgba8888 : &_mesa_texformat_rgb565;
+
+   case GL_RGB5:
+   case GL_RGB4:
+   case GL_R3_G3_B2:
+      return &_mesa_texformat_rgb565;
+
+   case GL_ALPHA:
+   case GL_ALPHA4:
+   case GL_ALPHA8:
+   case GL_ALPHA12:
+   case GL_ALPHA16:
+   case GL_COMPRESSED_ALPHA:
+      return &_mesa_texformat_al88;
+
+   case 1:
+   case GL_LUMINANCE:
+   case GL_LUMINANCE4:
+   case GL_LUMINANCE8:
+   case GL_LUMINANCE12:
+   case GL_LUMINANCE16:
+   case GL_COMPRESSED_LUMINANCE:
+      return &_mesa_texformat_al88;
+
+   case 2:
+   case GL_LUMINANCE_ALPHA:
+   case GL_LUMINANCE4_ALPHA4:
+   case GL_LUMINANCE6_ALPHA2:
+   case GL_LUMINANCE8_ALPHA8:
+   case GL_LUMINANCE12_ALPHA4:
+   case GL_LUMINANCE12_ALPHA12:
+   case GL_LUMINANCE16_ALPHA16:
+   case GL_COMPRESSED_LUMINANCE_ALPHA:
+      return &_mesa_texformat_al88;
+
+   case GL_INTENSITY:
+   case GL_INTENSITY4:
+   case GL_INTENSITY8:
+   case GL_INTENSITY12:
+   case GL_INTENSITY16:
+   case GL_COMPRESSED_INTENSITY:
+      /* At the moment, glean & conform both fail using the i8 internal
+       * format.
+       */
+      return &_mesa_texformat_al88;
+/*       return &_mesa_texformat_i8; */
+
+   case GL_YCBCR_MESA:
+      if (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
+         type == GL_UNSIGNED_BYTE)
+         return &_mesa_texformat_ycbcr;
+      else
+         return &_mesa_texformat_ycbcr_rev;
+
+   default:
+      _mesa_problem(ctx, "unexpected texture format in %s", __FUNCTION__);
+      return NULL;
+   }
+
+   return NULL; /* never get here */
+}
+
+
+static GLboolean
+r200ValidateClientStorage( GLcontext *ctx, GLenum target,
+                          GLint internalFormat,
+                          GLint srcWidth, GLint srcHeight, 
+                           GLenum format, GLenum type,  const void *pixels,
+                          const struct gl_pixelstore_attrib *packing,
+                          struct gl_texture_object *texObj,
+                          struct gl_texture_image *texImage)
+
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   int texelBytes;
+
+   if (0)
+      fprintf(stderr, "intformat %s format %s type %s\n",
+             _mesa_lookup_enum_by_nr( internalFormat ),
+             _mesa_lookup_enum_by_nr( format ),
+             _mesa_lookup_enum_by_nr( type ));
+
+   if (!ctx->Unpack.ClientStorage)
+      return 0;
+
+   if (ctx->_ImageTransferState ||
+       texImage->IsCompressed ||
+       texObj->GenerateMipmap)
+      return 0;
+
+
+   /* This list is incomplete, may be different on ppc???
+    */
+   switch ( internalFormat ) {
+   case GL_RGBA:
+      if ( format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8_REV ) {
+        texImage->TexFormat = &_mesa_texformat_argb8888;
+        texelBytes = 4;
+      }
+      else
+        return 0;
+      break;
+
+   case GL_RGB:
+      if ( format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5 ) {
+        texImage->TexFormat = &_mesa_texformat_rgb565;
+        texelBytes = 2;
+      }
+      else
+        return 0;
+      break;
+
+   case GL_YCBCR_MESA:
+      if ( format == GL_YCBCR_MESA && 
+          type == GL_UNSIGNED_SHORT_8_8_REV_APPLE ) {
+        texImage->TexFormat = &_mesa_texformat_ycbcr_rev;
+        texelBytes = 2;
+      }
+      else if ( format == GL_YCBCR_MESA && 
+               (type == GL_UNSIGNED_SHORT_8_8_APPLE || 
+                type == GL_UNSIGNED_BYTE)) {
+        texImage->TexFormat = &_mesa_texformat_ycbcr;
+        texelBytes = 2;
+      }
+      else
+        return 0;
+      break;
+      
+        
+   default:
+      return 0;
+   }
+
+   /* Could deal with these packing issues, but currently don't:
+    */
+   if (packing->SkipPixels || 
+       packing->SkipRows || 
+       packing->SwapBytes ||
+       packing->LsbFirst) {
+      return 0;
+   }
+
+   {      
+      GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth,
+                                                 format, type);
+
+      
+      if (0)
+        fprintf(stderr, "%s: srcRowStride %d/%x\n", 
+                __FUNCTION__, srcRowStride, srcRowStride);
+
+      /* Could check this later in upload, pitch restrictions could be
+       * relaxed, but would need to store the image pitch somewhere,
+       * as packing details might change before image is uploaded:
+       */
+      if (!r200IsAgpMemory( rmesa, pixels, srcHeight * srcRowStride ) ||
+         (srcRowStride & 63))
+        return 0;
+
+
+      /* Have validated that _mesa_transfer_teximage would be a straight
+       * memcpy at this point.  NOTE: future calls to TexSubImage will
+       * overwrite the client data.  This is explicitly mentioned in the
+       * extension spec.
+       */
+      texImage->Data = (void *)pixels;
+      texImage->IsClientData = GL_TRUE;
+      texImage->RowStride = srcRowStride / texelBytes;
+      return 1;
+   }
+}
+
+
+static void r200TexImage1D( GLcontext *ctx, GLenum target, GLint level,
+                              GLint internalFormat,
+                              GLint width, GLint border,
+                              GLenum format, GLenum type, const GLvoid *pixels,
+                              const struct gl_pixelstore_attrib *packing,
+                              struct gl_texture_object *texObj,
+                              struct gl_texture_image *texImage )
+{
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+
+   if ( t ) {
+      driSwapOutTextureObject( t );
+   }
+   else {
+      t = (driTextureObject *) r200AllocTexObj( texObj );
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D");
+         return;
+      }
+   }
+
+   /* Note, this will call ChooseTextureFormat */
+   _mesa_store_teximage1d(ctx, target, level, internalFormat,
+                          width, border, format, type, pixels,
+                          &ctx->Unpack, texObj, texImage);
+
+   t->dirty_images[0] |= (1 << level);
+}
+
+
+static void r200TexSubImage1D( GLcontext *ctx, GLenum target, GLint level,
+                                 GLint xoffset,
+                                 GLsizei width,
+                                 GLenum format, GLenum type,
+                                 const GLvoid *pixels,
+                                 const struct gl_pixelstore_attrib *packing,
+                                 struct gl_texture_object *texObj,
+                                 struct gl_texture_image *texImage )
+{
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+
+   assert( t ); /* this _should_ be true */
+   if ( t ) {
+      driSwapOutTextureObject( t );
+   }
+   else {
+      t = (driTextureObject *) r200AllocTexObj( texObj );
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D");
+         return;
+      }
+   }
+
+   _mesa_store_texsubimage1d(ctx, target, level, xoffset, width,
+                            format, type, pixels, packing, texObj,
+                            texImage);
+
+   t->dirty_images[0] |= (1 << level);
+}
+
+
+static void r200TexImage2D( GLcontext *ctx, GLenum target, GLint level,
+                              GLint internalFormat,
+                              GLint width, GLint height, GLint border,
+                              GLenum format, GLenum type, const GLvoid *pixels,
+                              const struct gl_pixelstore_attrib *packing,
+                              struct gl_texture_object *texObj,
+                              struct gl_texture_image *texImage )
+{
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+   GLuint face;
+
+   /* which cube face or ordinary 2D image */
+   switch (target) {
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+      ASSERT(face < 6);
+      break;
+   default:
+      face = 0;
+   }
+
+   if ( t != NULL ) {
+      driSwapOutTextureObject( t );
+   }
+   else {
+      t = (driTextureObject *) r200AllocTexObj( texObj );
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
+         return;
+      }
+   }
+
+   texImage->IsClientData = GL_FALSE;
+
+   if (r200ValidateClientStorage( ctx, target, 
+                                 internalFormat, 
+                                 width, height, 
+                                 format, type, pixels, 
+                                 packing, texObj, texImage)) {
+      if (R200_DEBUG & DEBUG_TEXTURE)
+        fprintf(stderr, "%s: Using client storage\n", __FUNCTION__); 
+   }
+   else {
+      if (R200_DEBUG & DEBUG_TEXTURE)
+        fprintf(stderr, "%s: Using normal storage\n", __FUNCTION__); 
+
+      /* Normal path: copy (to cached memory) and eventually upload
+       * via another copy to agp memory and then a blit...  Could
+       * eliminate one copy by going straight to (permanent) agp.
+       *
+       * Note, this will call r200ChooseTextureFormat.
+       */
+      _mesa_store_teximage2d(ctx, target, level, internalFormat,
+                            width, height, border, format, type, pixels,
+                            &ctx->Unpack, texObj, texImage);
+      
+      t->dirty_images[face] |= (1 << level);
+   }
+}
+
+
+static void r200TexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
+                                 GLint xoffset, GLint yoffset,
+                                 GLsizei width, GLsizei height,
+                                 GLenum format, GLenum type,
+                                 const GLvoid *pixels,
+                                 const struct gl_pixelstore_attrib *packing,
+                                 struct gl_texture_object *texObj,
+                                 struct gl_texture_image *texImage )
+{
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+   GLuint face;
+
+
+   /* which cube face or ordinary 2D image */
+   switch (target) {
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+      ASSERT(face < 6);
+      break;
+   default:
+      face = 0;
+   }
+
+   assert( t ); /* this _should_ be true */
+   if ( t ) {
+      driSwapOutTextureObject( t );
+   }
+   else {
+      t = (driTextureObject *) r200AllocTexObj( texObj );
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D");
+         return;
+      }
+   }
+
+   _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
+                            height, format, type, pixels, packing, texObj,
+                            texImage);
+
+   t->dirty_images[face] |= (1 << level);
+}
+
+
+#if ENABLE_HW_3D_TEXTURE
+static void r200TexImage3D( GLcontext *ctx, GLenum target, GLint level,
+                            GLint internalFormat,
+                            GLint width, GLint height, GLint depth,
+                            GLint border,
+                            GLenum format, GLenum type, const GLvoid *pixels,
+                            const struct gl_pixelstore_attrib *packing,
+                            struct gl_texture_object *texObj,
+                            struct gl_texture_image *texImage )
+{
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+
+   if ( t ) {
+      driSwapOutTextureObject( t );
+   }
+   else {
+      t = r200AllocTexObj( texObj );
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage3D");
+         return;
+      }
+   }
+
+   texImage->IsClientData = GL_FALSE;
+
+#if 0
+   if (r200ValidateClientStorage( ctx, target, 
+                                 internalFormat, 
+                                 width, height, 
+                                 format, type, pixels, 
+                                 packing, texObj, texImage)) {
+      if (R200_DEBUG & DEBUG_TEXTURE)
+        fprintf(stderr, "%s: Using client storage\n", __FUNCTION__); 
+   }
+   else
+#endif
+   {
+      if (R200_DEBUG & DEBUG_TEXTURE)
+        fprintf(stderr, "%s: Using normal storage\n", __FUNCTION__); 
+
+      /* Normal path: copy (to cached memory) and eventually upload
+       * via another copy to agp memory and then a blit...  Could
+       * eliminate one copy by going straight to (permanent) agp.
+       *
+       * Note, this will call r200ChooseTextureFormat.
+       */
+      _mesa_store_teximage3d(ctx, target, level, internalFormat,
+                            width, height, depth, border,
+                             format, type, pixels,
+                            &ctx->Unpack, texObj, texImage);
+      
+      t->dirty_images[0] |= (1 << level);
+   }
+}
+#endif
+
+
+#if ENABLE_HW_3D_TEXTURE
+static void
+r200TexSubImage3D( GLcontext *ctx, GLenum target, GLint level,
+                   GLint xoffset, GLint yoffset, GLint zoffset,
+                   GLsizei width, GLsizei height, GLsizei depth,
+                   GLenum format, GLenum type,
+                   const GLvoid *pixels,
+                   const struct gl_pixelstore_attrib *packing,
+                   struct gl_texture_object *texObj,
+                   struct gl_texture_image *texImage )
+{
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+
+/*     fprintf(stderr, "%s\n", __FUNCTION__); */
+
+   assert( t ); /* this _should_ be true */
+   if ( t ) {
+      driSwapOutTextureObject( t );
+   }
+   else {
+      t = r200AllocTexObj(texObj);
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage3D");
+         return;
+      }
+      texObj->DriverData = t;
+   }
+
+   _mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset,
+                             width, height, depth,
+                             format, type, pixels, packing, texObj, texImage);
+
+   t->dirty_images[0] |= (1 << level);
+}
+#endif
+
+
+
+static void r200TexEnv( GLcontext *ctx, GLenum target,
+                         GLenum pname, const GLfloat *param )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint unit = ctx->Texture.CurrentUnit;
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+
+   if ( R200_DEBUG & DEBUG_STATE ) {
+      fprintf( stderr, "%s( %s )\n",
+              __FUNCTION__, _mesa_lookup_enum_by_nr( pname ) );
+   }
+
+   /* This is incorrect: Need to maintain this data for each of
+    * GL_TEXTURE_{123}D, GL_TEXTURE_RECTANGLE_NV, etc, and switch
+    * between them according to _ReallyEnabled.
+    */
+   switch ( pname ) {
+   case GL_TEXTURE_ENV_COLOR: {
+      GLubyte c[4];
+      GLuint envColor;
+      UNCLAMPED_FLOAT_TO_RGBA_CHAN( c, texUnit->EnvColor );
+      envColor = r200PackColor( 4, c[0], c[1], c[2], c[3] );
+      if ( rmesa->hw.tf.cmd[TF_TFACTOR_0 + unit] != envColor ) {
+        R200_STATECHANGE( rmesa, tf );
+        rmesa->hw.tf.cmd[TF_TFACTOR_0 + unit] = envColor;
+      }
+      break;
+   }
+
+   case GL_TEXTURE_LOD_BIAS_EXT: {
+      GLfloat bias;
+      GLuint b;
+      const int fixed_one = 0x8000000;
+
+      /* The R200's LOD bias is a signed 2's complement value with a
+       * range of -16.0 <= bias < 16.0. 
+       *
+       * NOTE: Add a small bias to the bias for conform mipsel.c test.
+       */
+      bias = *param + .01;
+      bias = CLAMP( bias, -16.0, 16.0 );
+      b = (int)(bias * fixed_one) & R200_LOD_BIAS_MASK;
+      
+      if ( (rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT_X] & R200_LOD_BIAS_MASK) != b ) {
+        R200_STATECHANGE( rmesa, tex[unit] );
+        rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT_X] &= ~R200_LOD_BIAS_MASK;
+        rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT_X] |= b;
+      }
+      break;
+   }
+
+   default:
+      return;
+   }
+}
+
+
+/**
+ * Changes variables and flags for a state update, which will happen at the
+ * next UpdateTextureState
+ */
+
+static void r200TexParameter( GLcontext *ctx, GLenum target,
+                               struct gl_texture_object *texObj,
+                               GLenum pname, const GLfloat *params )
+{
+   r200TexObjPtr t = (r200TexObjPtr) texObj->DriverData;
+
+   if ( R200_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
+      fprintf( stderr, "%s( %s )\n", __FUNCTION__,
+              _mesa_lookup_enum_by_nr( pname ) );
+   }
+
+   if ( ( target != GL_TEXTURE_2D ) &&
+       ( target != GL_TEXTURE_1D ) )
+      return;
+
+   switch ( pname ) {
+   case GL_TEXTURE_MIN_FILTER:
+   case GL_TEXTURE_MAG_FILTER:
+   case GL_TEXTURE_MAX_ANISOTROPY_EXT:
+      r200SetTexMaxAnisotropy( t, texObj->MaxAnisotropy );
+      r200SetTexFilter( t, texObj->MinFilter, texObj->MagFilter );
+      break;
+
+   case GL_TEXTURE_WRAP_S:
+   case GL_TEXTURE_WRAP_T:
+   case GL_TEXTURE_WRAP_R:
+      r200SetTexWrap( t, texObj->WrapS, texObj->WrapT, texObj->WrapR );
+      break;
+
+   case GL_TEXTURE_BORDER_COLOR:
+      r200SetTexBorderColor( t, texObj->_BorderChan );
+      break;
+
+   case GL_TEXTURE_BASE_LEVEL:
+   case GL_TEXTURE_MAX_LEVEL:
+   case GL_TEXTURE_MIN_LOD:
+   case GL_TEXTURE_MAX_LOD:
+      /* This isn't the most efficient solution but there doesn't appear to
+       * be a nice alternative.  Since there's no LOD clamping,
+       * we just have to rely on loading the right subset of mipmap levels
+       * to simulate a clamped LOD.
+       */
+      driSwapOutTextureObject( (driTextureObject *) t );
+      break;
+
+   default:
+      return;
+   }
+
+   /* Mark this texobj as dirty (one bit per tex unit)
+    */
+   t->dirty_state = TEX_ALL;
+}
+
+
+
+static void r200BindTexture( GLcontext *ctx, GLenum target,
+                              struct gl_texture_object *texObj )
+{
+   if ( R200_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
+      fprintf( stderr, "%s( %p ) unit=%d\n", __FUNCTION__, texObj,
+              ctx->Texture.CurrentUnit );
+   }
+
+   if ( target == GL_TEXTURE_2D || target == GL_TEXTURE_1D ) {
+      if ( texObj->DriverData == NULL ) {
+        r200AllocTexObj( texObj );
+      }
+   }
+}
+
+static void r200DeleteTexture( GLcontext *ctx,
+                                struct gl_texture_object *texObj )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+
+   if ( R200_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
+      fprintf( stderr, "%s( %p (target = %s) )\n", __FUNCTION__, texObj,
+              _mesa_lookup_enum_by_nr( texObj->Target ) );
+   }
+
+   if ( t != NULL ) {
+      if ( rmesa ) {
+         R200_FIREVERTICES( rmesa );
+      }
+
+      driDestroyTextureObject( t );
+   }
+}
+
+/* Need:  
+ *  - Same GEN_MODE for all active bits
+ *  - Same EyePlane/ObjPlane for all active bits when using Eye/Obj
+ *  - STRQ presumably all supported (matrix means incoming R values
+ *    can end up in STQ, this has implications for vertex support,
+ *    presumably ok if maos is used, though?)
+ *  
+ * Basically impossible to do this on the fly - just collect some
+ * basic info & do the checks from ValidateState().
+ */
+static void r200TexGen( GLcontext *ctx,
+                         GLenum coord,
+                         GLenum pname,
+                         const GLfloat *params )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint unit = ctx->Texture.CurrentUnit;
+   rmesa->recheck_texgen[unit] = GL_TRUE;
+}
+
+
+void r200InitTextureFuncs( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+
+   ctx->Driver.ChooseTextureFormat     = r200ChooseTextureFormat;
+   ctx->Driver.TexImage1D              = r200TexImage1D;
+   ctx->Driver.TexImage2D              = r200TexImage2D;
+#if ENABLE_HW_3D_TEXTURE
+   ctx->Driver.TexImage3D              = r200TexImage3D;
+#else
+   ctx->Driver.TexImage3D              = _mesa_store_teximage3d;
+#endif
+   ctx->Driver.TexSubImage1D           = r200TexSubImage1D;
+   ctx->Driver.TexSubImage2D           = r200TexSubImage2D;
+#if ENABLE_HW_3D_TEXTURE
+   ctx->Driver.TexSubImage3D           = r200TexSubImage3D;
+#else
+   ctx->Driver.TexSubImage3D           = _mesa_store_texsubimage3d;
+#endif
+   ctx->Driver.CopyTexImage1D          = _swrast_copy_teximage1d;
+   ctx->Driver.CopyTexImage2D          = _swrast_copy_teximage2d;
+   ctx->Driver.CopyTexSubImage1D       = _swrast_copy_texsubimage1d;
+   ctx->Driver.CopyTexSubImage2D       = _swrast_copy_texsubimage2d;
+   ctx->Driver.CopyTexSubImage3D       = _swrast_copy_texsubimage3d;
+   ctx->Driver.TestProxyTexImage       = _mesa_test_proxy_teximage;
+
+   ctx->Driver.BindTexture             = r200BindTexture;
+   ctx->Driver.CreateTexture           = NULL; /* FIXME: Is this used??? */
+   ctx->Driver.DeleteTexture           = r200DeleteTexture;
+   ctx->Driver.IsTextureResident       = driIsTextureResident;
+   ctx->Driver.PrioritizeTexture       = NULL;
+   ctx->Driver.ActiveTexture           = NULL;
+   ctx->Driver.UpdateTexturePalette    = NULL;
+
+   ctx->Driver.TexEnv                  = r200TexEnv;
+   ctx->Driver.TexParameter            = r200TexParameter;
+   ctx->Driver.TexGen                   = r200TexGen;
+
+   driInitTextureObjects( ctx, & rmesa->swapped,
+                         DRI_TEXMGR_DO_TEXTURE_1D
+                         | DRI_TEXMGR_DO_TEXTURE_2D );
+}
diff --git a/src/mesa/drivers/dri/r200/r200_tex.h b/src/mesa/drivers/dri/r200/r200_tex.h
new file mode 100644 (file)
index 0000000..9013de6
--- /dev/null
@@ -0,0 +1,51 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef __R200_TEX_H__
+#define __R200_TEX_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+extern void r200UpdateTextureState( GLcontext *ctx );
+
+extern int r200UploadTexImages( r200ContextPtr rmesa, r200TexObjPtr t, GLuint face );
+
+extern void r200DestroyTexObj( r200ContextPtr rmesa, r200TexObjPtr t );
+
+extern void r200InitTextureFuncs( GLcontext *ctx );
+
+#endif
+#endif /* __R200_TEX_H__ */
diff --git a/src/mesa/drivers/dri/r200/r200_texmem.c b/src/mesa/drivers/dri/r200/r200_texmem.c
new file mode 100644 (file)
index 0000000..3477708
--- /dev/null
@@ -0,0 +1,505 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright (C) Tungsten Graphics 2002.  All Rights Reserved.  
+The Weather Channel, Inc. funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86
+license. This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation on the rights to use, copy, modify, merge, publish,
+distribute, sub license, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR THEIR
+SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ *
+ */
+#include <errno.h>
+
+#include "glheader.h"
+#include "imports.h"
+#include "context.h"
+#include "colormac.h"
+#include "macros.h"
+#include "simple_list.h"
+#include "radeon_reg.h" /* gets definition for usleep */
+#include "r200_context.h"
+#include "r200_state.h"
+#include "r200_ioctl.h"
+#include "r200_swtcl.h"
+#include "r200_tex.h"
+
+#include <unistd.h>  /* for usleep() */
+
+
+/**
+ * Destroy any device-dependent state associated with the texture.  This may
+ * include NULLing out hardware state that points to the texture.
+ */
+void
+r200DestroyTexObj( r200ContextPtr rmesa, r200TexObjPtr t )
+{
+   if ( R200_DEBUG & DEBUG_TEXTURE ) {
+      fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, t, t->base.tObj );
+   }
+
+   if ( rmesa != NULL ) {
+      unsigned   i;
+
+
+      for ( i = 0 ; i < rmesa->glCtx->Const.MaxTextureUnits ; i++ ) {
+        if ( t == rmesa->state.texture.unit[i].texobj ) {
+           rmesa->state.texture.unit[i].texobj = NULL;
+           remove_from_list( &rmesa->hw.tex[i] );
+           make_empty_list( &rmesa->hw.tex[i] );
+           remove_from_list( &rmesa->hw.cube[i] );
+           make_empty_list( &rmesa->hw.cube[i] );
+        }
+      }
+   }
+}
+
+
+/* ------------------------------------------------------------
+ * Texture image conversions
+ */
+
+
+static void r200UploadAGPClientSubImage( r200ContextPtr rmesa,
+                                        r200TexObjPtr t, 
+                                        struct gl_texture_image *texImage,
+                                        GLint hwlevel,
+                                        GLint x, GLint y, 
+                                        GLint width, GLint height )
+{
+   const struct gl_texture_format *texFormat = texImage->TexFormat;
+   GLuint srcPitch, dstPitch;
+   int blit_format;
+   int srcOffset;
+
+   /*
+    * XXX it appears that we always upload the full image, not a subimage.
+    * I.e. x==0, y==0, width=texWidth, height=texWidth.  If this is ever
+    * changed, the src pitch will have to change.
+    */
+   switch ( texFormat->TexelBytes ) {
+   case 1:
+      blit_format = R200_CP_COLOR_FORMAT_CI8;
+      srcPitch = t->image[0][0].width * texFormat->TexelBytes;
+      dstPitch = t->image[0][0].width * texFormat->TexelBytes;
+      break;
+   case 2:
+      blit_format = R200_CP_COLOR_FORMAT_RGB565;
+      srcPitch = t->image[0][0].width * texFormat->TexelBytes;
+      dstPitch = t->image[0][0].width * texFormat->TexelBytes;
+      break;
+   case 4:
+      blit_format = R200_CP_COLOR_FORMAT_ARGB8888;
+      srcPitch = t->image[0][0].width * texFormat->TexelBytes;
+      dstPitch = t->image[0][0].width * texFormat->TexelBytes;
+      break;
+   default:
+      return;
+   }
+
+   t->image[0][hwlevel].data = texImage->Data;
+   srcOffset = r200AgpOffsetFromVirtual( rmesa, texImage->Data );
+
+   assert( srcOffset != ~0 );
+
+   /* Don't currently need to cope with small pitches?
+    */
+   width = texImage->Width;
+   height = texImage->Height;
+
+   r200EmitWait( rmesa, RADEON_WAIT_3D );
+
+   r200EmitBlit( rmesa, blit_format, 
+                srcPitch,  
+                srcOffset,   
+                dstPitch,
+                t->bufAddr,
+                x, 
+                y, 
+                t->image[0][hwlevel].x + x,
+                t->image[0][hwlevel].y + y, 
+                width,
+                height );
+
+   r200EmitWait( rmesa, RADEON_WAIT_2D );
+}
+
+static void r200UploadRectSubImage( r200ContextPtr rmesa,
+                                   r200TexObjPtr t, 
+                                   struct gl_texture_image *texImage,
+                                   GLint x, GLint y, 
+                                   GLint width, GLint height )
+{
+   const struct gl_texture_format *texFormat = texImage->TexFormat;
+   int blit_format, dstPitch, done;
+
+   switch ( texFormat->TexelBytes ) {
+   case 1:
+      blit_format = R200_CP_COLOR_FORMAT_CI8;
+      break;
+   case 2:
+      blit_format = R200_CP_COLOR_FORMAT_RGB565;
+      break;
+   case 4:
+      blit_format = R200_CP_COLOR_FORMAT_ARGB8888;
+      break;
+   default:
+      return;
+   }
+
+   t->image[0][0].data = texImage->Data;
+
+   /* Currently don't need to cope with small pitches.
+    */
+   width = texImage->Width;
+   height = texImage->Height;
+   dstPitch = t->pp_txpitch + 32;
+
+   if (rmesa->prefer_agp_client_texturing && texImage->IsClientData) {
+      /* In this case, could also use agp texturing.  This is
+       * currently disabled, but has been tested & works.
+       */
+      t->pp_txoffset = r200AgpOffsetFromVirtual( rmesa, texImage->Data );
+      t->pp_txpitch = texImage->RowStride * texFormat->TexelBytes - 32;
+
+      if (R200_DEBUG & DEBUG_TEXTURE)
+        fprintf(stderr, 
+                "Using agp texturing for rectangular client texture\n");
+
+      /* Release FB memory allocated for this image:
+       */
+      /* FIXME This may not be correct as driSwapOutTextureObject sets
+       * FIXME dirty_images.  It may be fine, though.
+       */
+      if ( t->base.memBlock ) {
+        driSwapOutTextureObject( (driTextureObject *) t );
+      }
+   }
+   else if (texImage->IsClientData) {
+      /* Data already in agp memory, with usable pitch.
+       */
+      GLuint srcPitch;
+      srcPitch = texImage->RowStride * texFormat->TexelBytes;
+      r200EmitBlit( rmesa, 
+                   blit_format, 
+                   srcPitch,
+                   r200AgpOffsetFromVirtual( rmesa, texImage->Data ),   
+                   dstPitch, t->bufAddr,
+                   0, 0, 
+                   0, 0, 
+                   width, height );
+   }
+   else {
+      /* Data not in agp memory, or bad pitch.
+       */
+      for (done = 0; done < height ; ) {
+        struct r200_dma_region region;
+        int lines = MIN2( height - done, RADEON_BUFFER_SIZE / dstPitch );
+        int src_pitch;
+        char *tex;
+
+         src_pitch = texImage->RowStride * texFormat->TexelBytes;
+
+        tex = (char *)texImage->Data + done * src_pitch;
+
+        memset(&region, 0, sizeof(region));
+        r200AllocDmaRegion( rmesa, &region, lines * dstPitch, 64 );
+
+        /* Copy texdata to dma:
+         */
+        if (0)
+           fprintf(stderr, "%s: src_pitch %d dst_pitch %d\n",
+                   __FUNCTION__, src_pitch, dstPitch);
+
+        if (src_pitch == dstPitch) {
+           memcpy( region.address, tex, lines * src_pitch );
+        } 
+        else {
+           char *buf = region.address;
+           int i;
+           for (i = 0 ; i < lines ; i++) {
+              memcpy( buf, tex, src_pitch );
+              buf += dstPitch;
+              tex += src_pitch;
+           }
+        }
+
+        r200EmitWait( rmesa, RADEON_WAIT_3D );
+
+        /* Blit to framebuffer
+         */
+        r200EmitBlit( rmesa, 
+                      blit_format, 
+                      dstPitch, GET_START( &region ),   
+                      dstPitch, t->bufAddr,
+                      0, 0, 
+                      0, done, 
+                      width, lines );
+        
+        r200EmitWait( rmesa, RADEON_WAIT_2D );
+
+        r200ReleaseDmaRegion( rmesa, &region, __FUNCTION__ );
+        done += lines;
+      }
+   }
+}
+
+
+/**
+ * Upload the texture image associated with texture \a t at the specified
+ * level at the address relative to \a start.
+ */
+static void uploadSubImage( r200ContextPtr rmesa, r200TexObjPtr t, 
+                           GLint hwlevel,
+                           GLint x, GLint y, GLint width, GLint height,
+                           GLuint face )
+{
+   struct gl_texture_image *texImage = NULL;
+   GLuint offset;
+   GLint imageWidth, imageHeight;
+   GLint ret;
+   drmRadeonTexture tex;
+   drmRadeonTexImage tmp;
+   const int level = hwlevel + t->base.firstLevel;
+
+   if ( R200_DEBUG & DEBUG_TEXTURE ) {
+      fprintf( stderr, "%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n", 
+              __FUNCTION__, t, t->base.tObj, level, width, height, face );
+   }
+
+   ASSERT(face < 6);
+
+   /* Ensure we have a valid texture to upload */
+   if ( ( hwlevel < 0 ) || ( hwlevel >= RADEON_MAX_TEXTURE_LEVELS ) ) {
+      _mesa_problem(NULL, "bad texture level in %s", __FUNCTION__);
+      return;
+   }
+
+   switch (face) {
+   case 0:
+      texImage = t->base.tObj->Image[level];
+      break;
+   case 1:
+      texImage = t->base.tObj->NegX[level];
+      break;
+   case 2:
+      texImage = t->base.tObj->PosY[level];
+      break;
+   case 3:
+      texImage = t->base.tObj->NegY[level];
+      break;
+   case 4:
+      texImage = t->base.tObj->PosZ[level];
+      break;
+   case 5:
+      texImage = t->base.tObj->NegZ[level];
+      break;
+   }
+
+   if ( !texImage ) {
+      if ( R200_DEBUG & DEBUG_TEXTURE )
+        fprintf( stderr, "%s: texImage %d is NULL!\n", __FUNCTION__, level );
+      return;
+   }
+   if ( !texImage->Data ) {
+      if ( R200_DEBUG & DEBUG_TEXTURE )
+        fprintf( stderr, "%s: image data is NULL!\n", __FUNCTION__ );
+      return;
+   }
+
+
+   if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
+      assert(level == 0);
+      assert(hwlevel == 0);
+      if ( R200_DEBUG & DEBUG_TEXTURE )
+        fprintf( stderr, "%s: image data is rectangular\n", __FUNCTION__);
+      r200UploadRectSubImage( rmesa, t, texImage, x, y, width, height );
+      return;
+   }
+   else if (texImage->IsClientData) {
+      if ( R200_DEBUG & DEBUG_TEXTURE )
+        fprintf( stderr, "%s: image data is in agp client storage\n",
+                 __FUNCTION__);
+      r200UploadAGPClientSubImage( rmesa, t, texImage, hwlevel,
+                                  x, y, width, height );
+      return;
+   }
+   else if ( R200_DEBUG & DEBUG_TEXTURE )
+      fprintf( stderr, "%s: image data is in normal memory\n",
+              __FUNCTION__);
+      
+
+   imageWidth = texImage->Width;
+   imageHeight = texImage->Height;
+
+   offset = t->bufAddr;
+
+   if ( R200_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
+      GLint imageX = 0;
+      GLint imageY = 0;
+      GLint blitX = t->image[face][hwlevel].x;
+      GLint blitY = t->image[face][hwlevel].y;
+      GLint blitWidth = t->image[face][hwlevel].width;
+      GLint blitHeight = t->image[face][hwlevel].height;
+      fprintf( stderr, "   upload image: %d,%d at %d,%d\n",
+              imageWidth, imageHeight, imageX, imageY );
+      fprintf( stderr, "   upload  blit: %d,%d at %d,%d\n",
+              blitWidth, blitHeight, blitX, blitY );
+      fprintf( stderr, "       blit ofs: 0x%07x level: %d/%d\n",
+              (GLuint)offset, hwlevel, level );
+   }
+
+   t->image[face][hwlevel].data = texImage->Data;
+
+   /* Init the DRM_RADEON_TEXTURE command / drmRadeonTexture struct.
+    * NOTE: we're always use a 1KB-wide blit and I8 texture format.
+    * We used to use 1, 2 and 4-byte texels and used to use the texture
+    * width to dictate the blit width - but that won't work for compressed
+    * textures. (Brian)
+    */
+   tex.offset = offset;
+   tex.pitch = BLIT_WIDTH_BYTES / 64;
+   tex.format = R200_TXFORMAT_I8; /* any 1-byte texel format */
+   if (texImage->TexFormat->TexelBytes) {
+      tex.width = imageWidth * texImage->TexFormat->TexelBytes; /* in bytes */
+      tex.height = imageHeight;
+   }
+   else {
+      tex.width = imageWidth; /* compressed */
+      tex.height = imageHeight;
+      if (tex.height < 4)
+         tex.height = 4;
+   }
+   tex.image = &tmp;
+
+   /* copy (x,y,width,height,data) */
+   memcpy( &tmp, &t->image[face][hwlevel], sizeof(drmRadeonTexImage) );
+
+   LOCK_HARDWARE( rmesa );
+   do {
+      ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_TEXTURE,
+                                 &tex, sizeof(drmRadeonTexture) );
+      if (ret) {
+        if (R200_DEBUG & DEBUG_IOCTL)
+           fprintf(stderr, "DRM_RADEON_TEXTURE:  again!\n");
+        usleep(1);
+      }
+   } while ( ret && errno == EAGAIN );
+
+   UNLOCK_HARDWARE( rmesa );
+
+   if ( ret ) {
+      fprintf( stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret );
+      fprintf( stderr, "   offset=0x%08x\n",
+              offset );
+      fprintf( stderr, "   image width=%d height=%d\n",
+              imageWidth, imageHeight );
+      fprintf( stderr, "    blit width=%d height=%d data=%p\n",
+              t->image[face][hwlevel].width, t->image[face][hwlevel].height,
+              t->image[face][hwlevel].data );
+      exit( 1 );
+   }
+}
+
+
+/**
+ * Upload the texture images associated with texture \a t.  This might
+ * require the allocation of texture memory.
+ * 
+ * \param rmesa Context pointer
+ * \param t Texture to be uploaded
+ * \param face Cube map face to be uploaded.  Zero for non-cube maps.
+ */
+
+int r200UploadTexImages( r200ContextPtr rmesa, r200TexObjPtr t, GLuint face )
+{
+   const int numLevels = t->base.lastLevel - t->base.firstLevel + 1;
+
+   if ( R200_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
+      fprintf( stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__,
+              rmesa->glCtx, t->base.tObj, t->base.totalSize,
+              t->base.firstLevel, t->base.lastLevel );
+   }
+
+   if ( !t || t->base.totalSize == 0 )
+      return 0;
+
+   if (R200_DEBUG & DEBUG_SYNC) {
+      fprintf(stderr, "%s: Syncing\n", __FUNCTION__ );
+      r200Finish( rmesa->glCtx );
+   }
+
+   LOCK_HARDWARE( rmesa );
+
+   if ( t->base.memBlock == NULL ) {
+      int heap;
+
+      heap = driAllocateTexture( rmesa->texture_heaps, rmesa->nr_heaps,
+                                (driTextureObject *) t );
+      if ( heap == -1 ) {
+        UNLOCK_HARDWARE( rmesa );
+        return -1;
+      }
+
+      /* Set the base offset of the texture image */
+      t->bufAddr = rmesa->r200Screen->texOffset[heap] 
+          + t->base.memBlock->ofs;
+      t->pp_txoffset = t->bufAddr;
+
+
+      /* Mark this texobj as dirty on all units:
+       */
+      t->dirty_state = TEX_ALL;
+   }
+
+   /* Let the world know we've used this memory recently.
+    */
+   driUpdateTextureLRU( (driTextureObject *) t );
+   UNLOCK_HARDWARE( rmesa );
+
+   /* Upload any images that are new */
+   if (t->base.dirty_images[face]) {
+      int i;
+      for ( i = 0 ; i < numLevels ; i++ ) {
+         if ( (t->base.dirty_images[face] & (1 << (i+t->base.firstLevel))) != 0 ) {
+            uploadSubImage( rmesa, t, i, 0, 0, t->image[face][i].width,
+                           t->image[face][i].height, face );
+         }
+      }
+      t->base.dirty_images[face] = 0;
+   }
+
+
+   if (R200_DEBUG & DEBUG_SYNC) {
+      fprintf(stderr, "%s: Syncing\n", __FUNCTION__ );
+      r200Finish( rmesa->glCtx );
+   }
+
+   return 0;
+}
diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c
new file mode 100644 (file)
index 0000000..e1dc206
--- /dev/null
@@ -0,0 +1,1824 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "glheader.h"
+#include "imports.h"
+#include "context.h"
+#include "macros.h"
+#include "texformat.h"
+#include "enums.h"
+
+#include "r200_context.h"
+#include "r200_state.h"
+#include "r200_ioctl.h"
+#include "r200_swtcl.h"
+#include "r200_tex.h"
+#include "r200_tcl.h"
+
+
+#define R200_TXFORMAT_AL88      R200_TXFORMAT_AI88
+#define R200_TXFORMAT_YCBCR     R200_TXFORMAT_YVYU422
+#define R200_TXFORMAT_YCBCR_REV R200_TXFORMAT_VYUY422
+
+#define _COLOR(f) \
+    [ MESA_FORMAT_ ## f ] = { R200_TXFORMAT_ ## f, 0 }
+#define _ALPHA(f) \
+    [ MESA_FORMAT_ ## f ] = { R200_TXFORMAT_ ## f | R200_TXFORMAT_ALPHA_IN_MAP, 0 }
+#define _YUV(f) \
+    [ MESA_FORMAT_ ## f ] = { R200_TXFORMAT_ ## f, R200_YUV_TO_RGB }
+#define _INVALID(f) \
+    [ MESA_FORMAT_ ## f ] = { 0xffffffff, 0 }
+#define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_YCBCR_REV) \
+                            && (tx_table[f].format != 0xffffffff) )
+
+static const struct {
+   GLuint format, filter;
+}
+tx_table[] =
+{
+   _ALPHA(RGBA8888),
+   _ALPHA(ARGB8888),
+   _INVALID(RGB888),
+   _COLOR(RGB565),
+   _ALPHA(ARGB4444),
+   _ALPHA(ARGB1555),
+   _ALPHA(AL88),
+   _INVALID(A8),
+   _INVALID(L8),
+   _COLOR(I8),
+   _INVALID(CI8),
+   _YUV(YCBCR),
+   _YUV(YCBCR_REV),
+};
+
+#undef _COLOR
+#undef _ALPHA
+#undef _INVALID
+
+/**
+ * This function computes the number of bytes of storage needed for
+ * the given texture object (all mipmap levels, all cube faces).
+ * The \c image[face][level].x/y/width/height parameters for upload/blitting
+ * are computed here.  \c pp_txfilter, \c pp_txformat, etc. will be set here
+ * too.
+ * 
+ * \param rmesa Context pointer
+ * \param tObj GL texture object whose images are to be posted to
+ *                 hardware state.
+ */
+static void r200SetTexImages( r200ContextPtr rmesa,
+                             struct gl_texture_object *tObj )
+{
+   r200TexObjPtr t = (r200TexObjPtr)tObj->DriverData;
+   const struct gl_texture_image *baseImage = tObj->Image[tObj->BaseLevel];
+   GLint curOffset;
+   GLint i;
+   GLint firstLevel=0, lastLevel=0, numLevels;
+   GLint log2Width, log2Height, log2Depth;
+
+   /* Set the hardware texture format
+    */
+
+   t->pp_txformat &= ~(R200_TXFORMAT_FORMAT_MASK |
+                      R200_TXFORMAT_ALPHA_IN_MAP);
+   t->pp_txfilter &= ~R200_YUV_TO_RGB;
+
+   if ( VALID_FORMAT( baseImage->TexFormat->MesaFormat ) ) {
+      t->pp_txformat |= tx_table[ baseImage->TexFormat->MesaFormat ].format;
+      t->pp_txfilter |= tx_table[ baseImage->TexFormat->MesaFormat ].filter;
+   }
+   else {
+      _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__);
+      return;
+   }
+
+
+
+   /* Compute which mipmap levels we really want to send to the hardware.
+    * This depends on the base image size, GL_TEXTURE_MIN_LOD,
+    * GL_TEXTURE_MAX_LOD, GL_TEXTURE_BASE_LEVEL, and GL_TEXTURE_MAX_LEVEL.
+    * Yes, this looks overly complicated, but it's all needed.
+    */
+   if (R200_DEBUG & DEBUG_TEXTURE)
+      fprintf(stderr,  
+             "%s: BaseLevel %d MinLod %f MaxLod %f MaxLevel %d\n",  
+             __FUNCTION__,
+             tObj->BaseLevel, tObj->MinLod, tObj->MaxLod, 
+             tObj->MaxLevel); 
+
+
+   switch (tObj->Target) {
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_CUBE_MAP:
+      firstLevel = tObj->BaseLevel + (GLint)(tObj->MinLod + 0.5);
+      firstLevel = MAX2(firstLevel, tObj->BaseLevel);
+      lastLevel = tObj->BaseLevel + (GLint)(tObj->MaxLod + 0.5);
+      lastLevel = MAX2(lastLevel, tObj->BaseLevel);
+      lastLevel = MIN2(lastLevel, tObj->BaseLevel + baseImage->MaxLog2);
+      lastLevel = MIN2(lastLevel, tObj->MaxLevel);
+      lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */
+      log2Width = tObj->Image[firstLevel]->WidthLog2;
+      log2Height = tObj->Image[firstLevel]->HeightLog2;
+      log2Depth = 0;
+      break;
+   case GL_TEXTURE_3D:
+      firstLevel = tObj->BaseLevel;
+      lastLevel = tObj->BaseLevel;
+      log2Width = tObj->Image[firstLevel]->WidthLog2;
+      log2Height = tObj->Image[firstLevel]->HeightLog2;
+      log2Depth = tObj->Image[firstLevel]->DepthLog2;
+      break;
+   case GL_TEXTURE_RECTANGLE_NV:
+      firstLevel = lastLevel = 0;
+      log2Width = log2Height = 1; /* ? */
+      log2Depth = 0;
+      break;
+   default:
+      return;
+   }
+
+   /* save these values */
+   t->base.firstLevel = firstLevel;
+   t->base.lastLevel = lastLevel;
+
+   numLevels = lastLevel - firstLevel + 1;
+
+   if (R200_DEBUG & DEBUG_TEXTURE)
+      fprintf(stderr, 
+             "%s: firstLevel %d last Level %d w,h: %d,%d log(w,h) %d,%d\n",  
+             __FUNCTION__, firstLevel, lastLevel,
+             tObj->Image[firstLevel]->Width,
+             tObj->Image[firstLevel]->Height,
+             tObj->Image[firstLevel]->WidthLog2,
+             tObj->Image[firstLevel]->HeightLog2);
+
+
+   assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
+
+   /* Calculate mipmap offsets and dimensions for blitting (uploading)
+    * The idea is that we lay out the mipmap levels within a block of
+    * memory organized as a rectangle of width BLIT_WIDTH_BYTES.
+    */
+   curOffset = 0;
+
+   for (i = 0; i < numLevels; i++) {
+      const struct gl_texture_image *texImage;
+      GLuint size;
+
+      texImage = tObj->Image[i + firstLevel];
+      if ( !texImage )
+        break;
+
+      /* find image size in bytes */
+      if (texImage->IsCompressed) {
+         size = texImage->CompressedSize;
+      }
+      else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
+         size = ((texImage->Width * texImage->TexFormat->TexelBytes + 63)
+                 & ~63) * texImage->Height;
+      }
+      else {
+         int w = texImage->Width * texImage->TexFormat->TexelBytes;
+         if (w < 32)
+            w = 32;
+         size = w * texImage->Height * texImage->Depth;
+      }
+      assert(size > 0);
+
+      if (curOffset & 0x1f) {
+         /* align to 32-byte offset */
+         curOffset = (curOffset + 0x1f) & ~0x1f;
+      }
+
+      t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
+      t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
+      t->image[0][i].width  = MIN2(size, BLIT_WIDTH_BYTES);
+      t->image[0][i].height = size / t->image[0][i].width;
+
+#if 0
+      /* for debugging only and only  applicable to non-rectangle targets */
+      assert(size % t->image[0][i].width == 0);
+      assert(t->image[0][i].x == 0
+             || (size < BLIT_WIDTH_BYTES && t->image[0][i].height == 1));
+#endif
+
+      if (0)
+         fprintf(stderr,
+                 "level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n",
+                 i, texImage->Width, texImage->Height,
+                 t->image[0][i].x, t->image[0][i].y,
+                 t->image[0][i].width, t->image[0][i].height, size, curOffset);
+
+      curOffset += size;
+
+   }
+
+   /* Align the total size of texture memory block.
+    */
+   t->base.totalSize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
+
+   /* Setup remaining cube face blits, if needed */
+   if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
+      /* Round totalSize up to multiple of BLIT_WIDTH_BYTES */
+      const GLuint faceSize = (t->base.totalSize + BLIT_WIDTH_BYTES - 1)
+                              & ~(BLIT_WIDTH_BYTES-1);
+      const GLuint lines = faceSize / BLIT_WIDTH_BYTES;
+      GLuint face;
+      /* reuse face 0 x/y/width/height - just adjust y */
+      for (face = 1; face < 6; face++) {
+         for (i = 0; i < numLevels; i++) {
+            t->image[face][i].x =  t->image[0][i].x;
+            t->image[face][i].y =  t->image[0][i].y + face * lines;
+            t->image[face][i].width  = t->image[0][i].width;
+            t->image[face][i].height = t->image[0][i].height;
+         }
+      }
+      t->base.totalSize = 6 * faceSize; /* total texmem needed */
+   }
+
+
+   /* Hardware state:
+    */
+   t->pp_txfilter &= ~R200_MAX_MIP_LEVEL_MASK;
+   t->pp_txfilter |= (numLevels - 1) << R200_MAX_MIP_LEVEL_SHIFT;
+
+   t->pp_txformat &= ~(R200_TXFORMAT_WIDTH_MASK |
+                      R200_TXFORMAT_HEIGHT_MASK |
+                       R200_TXFORMAT_CUBIC_MAP_ENABLE |
+                       R200_TXFORMAT_F5_WIDTH_MASK |
+                       R200_TXFORMAT_F5_HEIGHT_MASK);
+   t->pp_txformat |= ((log2Width << R200_TXFORMAT_WIDTH_SHIFT) |
+                     (log2Height << R200_TXFORMAT_HEIGHT_SHIFT));
+
+   t->pp_txformat_x &= ~(R200_DEPTH_LOG2_MASK | R200_TEXCOORD_MASK);
+   if (tObj->Target == GL_TEXTURE_3D) {
+      t->pp_txformat_x |= (log2Depth << R200_DEPTH_LOG2_SHIFT);
+      t->pp_txformat_x |= R200_TEXCOORD_VOLUME;
+   }
+   else if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
+      ASSERT(log2Width == log2height);
+      t->pp_txformat |= ((log2Width << R200_TXFORMAT_F5_WIDTH_SHIFT) |
+                         (log2Height << R200_TXFORMAT_F5_HEIGHT_SHIFT) |
+                         (R200_TXFORMAT_CUBIC_MAP_ENABLE));
+      t->pp_txformat_x |= R200_TEXCOORD_CUBIC_ENV;
+      t->pp_cubic_faces = ((log2Width << R200_FACE_WIDTH_1_SHIFT) |
+                           (log2Height << R200_FACE_HEIGHT_1_SHIFT) |
+                           (log2Width << R200_FACE_WIDTH_2_SHIFT) |
+                           (log2Height << R200_FACE_HEIGHT_2_SHIFT) |
+                           (log2Width << R200_FACE_WIDTH_3_SHIFT) |
+                           (log2Height << R200_FACE_HEIGHT_3_SHIFT) |
+                           (log2Width << R200_FACE_WIDTH_4_SHIFT) |
+                           (log2Height << R200_FACE_HEIGHT_4_SHIFT));
+   }
+
+   t->pp_txsize = (((tObj->Image[firstLevel]->Width - 1) << 0) |
+                   ((tObj->Image[firstLevel]->Height - 1) << 16));
+
+   /* Only need to round to nearest 32 for textures, but the blitter
+    * requires 64-byte aligned pitches, and we may/may not need the
+    * blitter.   NPOT only!
+    */
+   if (baseImage->IsCompressed)
+      t->pp_txpitch = (tObj->Image[firstLevel]->Width + 63) & ~(63);
+   else
+      t->pp_txpitch = ((tObj->Image[firstLevel]->Width * baseImage->TexFormat->TexelBytes) + 63) & ~(63);
+   t->pp_txpitch -= 32;
+
+   t->dirty_state = TEX_ALL;
+
+   /* FYI: r200UploadTexImages( rmesa, t ) used to be called here */
+}
+
+
+
+/* ================================================================
+ * Texture combine functions
+ */
+
+#define R200_DISABLE           0
+#define R200_REPLACE           1
+#define R200_MODULATE          2
+#define R200_DECAL             3
+#define R200_BLEND             4
+#define R200_ADD               5
+#define R200_MAX_COMBFUNC      6
+
+static GLuint r200_color_combine[][R200_MAX_COMBFUNC] =
+{
+   /* Unit 0:
+    */
+   {
+      /* Disable combiner stage
+       */
+      (R200_TXC_ARG_A_ZERO  |
+       R200_TXC_ARG_B_ZERO |
+       R200_TXC_ARG_C_DIFFUSE_COLOR |
+       R200_TXC_OP_MADD),
+
+      /* GL_REPLACE = 0x00802800
+       */
+      (R200_TXC_ARG_A_ZERO |
+       R200_TXC_ARG_B_ZERO |
+       R200_TXC_ARG_C_R0_COLOR |
+       R200_TXC_OP_MADD),
+
+      /* GL_MODULATE = 0x00800142
+       */
+      (R200_TXC_ARG_A_DIFFUSE_COLOR | /* current starts in DIFFUSE */
+       R200_TXC_ARG_B_R0_COLOR |
+       R200_TXC_ARG_C_ZERO |
+       R200_TXC_OP_MADD),
+
+      /* GL_DECAL = 0x008c2d42
+       */
+      (R200_TXC_ARG_A_DIFFUSE_COLOR |
+       R200_TXC_ARG_B_R0_COLOR |
+       R200_TXC_ARG_C_R0_ALPHA |
+       R200_TXC_OP_LERP),
+
+      /* GL_BLEND = 0x008c2902
+       */
+      (R200_TXC_ARG_A_DIFFUSE_COLOR |
+       R200_TXC_ARG_B_TFACTOR_COLOR |
+       R200_TXC_ARG_C_R0_COLOR |
+       R200_TXC_OP_LERP),
+
+      /* GL_ADD = 0x00812802
+       */
+      (R200_TXC_ARG_A_DIFFUSE_COLOR |
+       R200_TXC_ARG_B_ZERO |
+       R200_TXC_ARG_C_R0_COLOR |
+       R200_TXC_COMP_ARG_B |
+       R200_TXC_OP_MADD),
+   },
+
+   /* Unit 1:
+    */
+   {
+      /* Disable combiner stage
+       */
+      (R200_TXC_ARG_A_ZERO |
+       R200_TXC_ARG_B_ZERO |
+       R200_TXC_ARG_C_R0_COLOR |
+       R200_TXC_OP_MADD),
+
+      /* GL_REPLACE = 0x00803000
+       */
+      (R200_TXC_ARG_A_ZERO |
+       R200_TXC_ARG_B_ZERO |
+       R200_TXC_ARG_C_R1_COLOR |
+       R200_TXC_OP_MADD),
+
+      /* GL_MODULATE = 0x00800182
+       */
+      (R200_TXC_ARG_A_R0_COLOR | /* current in R0 thereafter */
+       R200_TXC_ARG_B_R1_COLOR |
+       R200_TXC_ARG_C_ZERO |
+       R200_TXC_OP_MADD),
+
+      /* GL_DECAL = 0x008c3582
+       */
+      (R200_TXC_ARG_A_R0_COLOR |
+       R200_TXC_ARG_B_R1_COLOR |
+       R200_TXC_ARG_C_R1_ALPHA |
+       R200_TXC_OP_LERP),
+
+      /* GL_BLEND = 0x008c3102
+       */
+      (R200_TXC_ARG_A_R0_COLOR |
+       R200_TXC_ARG_B_TFACTOR_COLOR |
+       R200_TXC_ARG_C_R1_COLOR |
+       R200_TXC_OP_LERP),
+
+      /* GL_ADD = 0x00813002
+       */
+      (R200_TXC_ARG_A_R0_COLOR |
+       R200_TXC_ARG_B_ZERO |
+       R200_TXC_ARG_C_R1_COLOR |
+       R200_TXC_COMP_ARG_B |
+       R200_TXC_OP_MADD),
+   },
+
+   /* Unit 2:
+    */
+   {
+      /* Disable combiner stage
+       */
+      (R200_TXC_ARG_A_ZERO |
+       R200_TXC_ARG_B_ZERO |
+       R200_TXC_ARG_C_R0_COLOR |
+       R200_TXC_OP_MADD),
+
+      /* GL_REPLACE = 0x00803800
+       */
+      (R200_TXC_ARG_A_ZERO |
+       R200_TXC_ARG_B_ZERO |
+       R200_TXC_ARG_C_R2_COLOR |
+       R200_TXC_OP_MADD),
+
+      /* GL_MODULATE = 0x008001c2
+       */
+      (R200_TXC_ARG_A_R0_COLOR |
+       R200_TXC_ARG_B_R2_COLOR |
+       R200_TXC_ARG_C_ZERO |
+       R200_TXC_OP_MADD),
+
+      /* GL_DECAL = 0x008c3dc2
+       */
+      (R200_TXC_ARG_A_R0_COLOR |
+       R200_TXC_ARG_B_R2_COLOR |
+       R200_TXC_ARG_C_R2_ALPHA |
+       R200_TXC_OP_LERP),
+
+      /* GL_BLEND = 0x008c3902
+       */
+      (R200_TXC_ARG_A_R0_COLOR |
+       R200_TXC_ARG_B_TFACTOR_COLOR |
+       R200_TXC_ARG_C_R2_COLOR |
+       R200_TXC_OP_LERP),
+
+      /* GL_ADD = 0x00813802
+       */
+      (R200_TXC_ARG_A_R0_COLOR |
+       R200_TXC_ARG_B_ZERO |
+       R200_TXC_ARG_C_R2_COLOR |
+       R200_TXC_COMP_ARG_B |
+       R200_TXC_OP_MADD),
+   }
+};
+
+static GLuint r200_alpha_combine[][R200_MAX_COMBFUNC] =
+{
+   /* Unit 0:
+    */
+   {
+      /* Disable combiner stage
+       */
+      (R200_TXA_ARG_A_ZERO |
+       R200_TXA_ARG_B_ZERO |
+       R200_TXA_ARG_C_DIFFUSE_ALPHA |
+       R200_TXA_OP_MADD),
+
+
+      /* GL_REPLACE = 0x00800500
+       */
+      (R200_TXA_ARG_A_ZERO |
+       R200_TXA_ARG_B_ZERO |
+       R200_TXA_ARG_C_R0_ALPHA |
+       R200_TXA_OP_MADD),
+
+      /* GL_MODULATE = 0x00800051
+       */
+      (R200_TXA_ARG_A_DIFFUSE_ALPHA |
+       R200_TXA_ARG_B_R0_ALPHA |
+       R200_TXA_ARG_C_ZERO |
+       R200_TXA_OP_MADD),
+
+      /* GL_DECAL = 0x00800100
+       */
+      (R200_TXA_ARG_A_ZERO |
+       R200_TXA_ARG_B_ZERO |
+       R200_TXA_ARG_C_DIFFUSE_ALPHA |
+       R200_TXA_OP_MADD),
+
+      /* GL_BLEND = 0x00800051
+       */
+      (R200_TXA_ARG_A_DIFFUSE_ALPHA |
+       R200_TXA_ARG_B_TFACTOR_ALPHA |
+       R200_TXA_ARG_C_R0_ALPHA |
+       R200_TXA_OP_LERP),
+
+      /* GL_ADD = 0x00800051
+       */
+      (R200_TXA_ARG_A_DIFFUSE_ALPHA |
+       R200_TXA_ARG_B_ZERO |
+       R200_TXA_ARG_C_R0_ALPHA |
+       R200_TXA_COMP_ARG_B |
+       R200_TXA_OP_MADD),
+   },
+
+   /* Unit 1:
+    */
+   {
+      /* Disable combiner stage
+       */
+      (R200_TXA_ARG_A_ZERO |
+       R200_TXA_ARG_B_ZERO |
+       R200_TXA_ARG_C_R0_ALPHA |
+       R200_TXA_OP_MADD),
+
+      /* GL_REPLACE = 0x00800600
+       */
+      (R200_TXA_ARG_A_ZERO |
+       R200_TXA_ARG_B_ZERO |
+       R200_TXA_ARG_C_R1_ALPHA |
+       R200_TXA_OP_MADD),
+
+      /* GL_MODULATE = 0x00800061
+       */
+      (R200_TXA_ARG_A_R0_ALPHA |
+       R200_TXA_ARG_B_R1_ALPHA |
+       R200_TXA_ARG_C_ZERO |
+       R200_TXA_OP_MADD),
+
+      /* GL_DECAL = 0x00800100
+       */
+      (R200_TXA_ARG_A_ZERO |
+       R200_TXA_ARG_B_ZERO |
+       R200_TXA_ARG_C_R0_ALPHA |
+       R200_TXA_OP_MADD),
+
+      /* GL_BLEND = 0x00800061
+       */
+      (R200_TXA_ARG_A_R0_ALPHA |
+       R200_TXA_ARG_B_TFACTOR_ALPHA |
+       R200_TXA_ARG_C_R1_ALPHA |
+       R200_TXA_OP_LERP),
+
+      /* GL_ADD = 0x00800061
+       */
+      (R200_TXA_ARG_A_R0_ALPHA |
+       R200_TXA_ARG_B_ZERO |
+       R200_TXA_ARG_C_R1_ALPHA |
+       R200_TXA_COMP_ARG_B |
+       R200_TXA_OP_MADD),
+   },
+
+   /* Unit 2:
+    */
+   {
+      /* Disable combiner stage
+       */
+      (R200_TXA_ARG_A_ZERO |
+       R200_TXA_ARG_B_ZERO |
+       R200_TXA_ARG_C_R0_ALPHA |
+       R200_TXA_OP_MADD),
+
+      /* GL_REPLACE = 0x00800700
+       */
+      (R200_TXA_ARG_A_ZERO |
+       R200_TXA_ARG_B_ZERO |
+       R200_TXA_ARG_C_R2_ALPHA |
+       R200_TXA_OP_MADD),
+
+      /* GL_MODULATE = 0x00800071
+       */
+      (R200_TXA_ARG_A_R0_ALPHA |
+       R200_TXA_ARG_B_R2_ALPHA |
+       R200_TXA_ARG_C_ZERO |
+       R200_TXA_OP_MADD),
+
+      /* GL_DECAL = 0x00800100
+       */
+      (R200_TXA_ARG_A_ZERO |
+       R200_TXA_ARG_B_ZERO |
+       R200_TXA_ARG_C_R0_ALPHA |
+       R200_TXA_OP_MADD),
+
+      /* GL_BLEND = 0x00800071
+       */
+      (R200_TXA_ARG_A_R0_ALPHA |
+       R200_TXA_ARG_B_TFACTOR_ALPHA |
+       R200_TXA_ARG_C_R2_ALPHA |
+       R200_TXA_OP_LERP),
+
+      /* GL_ADD = 0x00800021
+       */
+      (R200_TXA_ARG_A_R0_ALPHA |
+       R200_TXA_ARG_B_ZERO |
+       R200_TXA_ARG_C_R2_ALPHA |
+       R200_TXA_COMP_ARG_B |
+       R200_TXA_OP_MADD),
+   }
+};
+
+
+/* GL_ARB_texture_env_combine support
+ */
+
+/* The color tables have combine functions for GL_SRC_COLOR,
+ * GL_ONE_MINUS_SRC_COLOR, GL_SRC_ALPHA and GL_ONE_MINUS_SRC_ALPHA.
+ */
+static GLuint r200_register_color[][R200_MAX_TEXTURE_UNITS] =
+{
+   {
+      R200_TXC_ARG_A_R0_COLOR,
+      R200_TXC_ARG_A_R1_COLOR,
+      R200_TXC_ARG_A_R2_COLOR
+   },
+   {
+      R200_TXC_ARG_A_R0_COLOR | R200_TXC_COMP_ARG_A,
+      R200_TXC_ARG_A_R1_COLOR | R200_TXC_COMP_ARG_A,
+      R200_TXC_ARG_A_R2_COLOR | R200_TXC_COMP_ARG_A
+   },
+   {
+      R200_TXC_ARG_A_R0_ALPHA,
+      R200_TXC_ARG_A_R1_ALPHA,
+      R200_TXC_ARG_A_R2_ALPHA
+   },
+   {
+      R200_TXC_ARG_A_R0_ALPHA | R200_TXC_COMP_ARG_A,
+      R200_TXC_ARG_A_R1_ALPHA | R200_TXC_COMP_ARG_A,
+      R200_TXC_ARG_A_R2_ALPHA | R200_TXC_COMP_ARG_A
+   },
+};
+
+static GLuint r200_tfactor_color[] =
+{
+   R200_TXC_ARG_A_TFACTOR_COLOR,
+   R200_TXC_ARG_A_TFACTOR_COLOR | R200_TXC_COMP_ARG_A,
+   R200_TXC_ARG_A_TFACTOR_ALPHA,
+   R200_TXC_ARG_A_TFACTOR_ALPHA | R200_TXC_COMP_ARG_A
+};
+
+static GLuint r200_primary_color[] =
+{
+   R200_TXC_ARG_A_DIFFUSE_COLOR,
+   R200_TXC_ARG_A_DIFFUSE_COLOR | R200_TXC_COMP_ARG_A,
+   R200_TXC_ARG_A_DIFFUSE_ALPHA,
+   R200_TXC_ARG_A_DIFFUSE_ALPHA | R200_TXC_COMP_ARG_A
+};
+
+/* GL_ZERO table - indices 0-3
+ * GL_ONE  table - indices 1-4
+ */
+static GLuint r200_zero_color[] =
+{
+   R200_TXC_ARG_A_ZERO,
+   R200_TXC_ARG_A_ZERO | R200_TXC_COMP_ARG_A,
+   R200_TXC_ARG_A_ZERO,
+   R200_TXC_ARG_A_ZERO | R200_TXC_COMP_ARG_A,
+   R200_TXC_ARG_A_ZERO
+};
+
+/* The alpha tables only have GL_SRC_ALPHA and GL_ONE_MINUS_SRC_ALPHA.
+ */
+static GLuint r200_register_alpha[][R200_MAX_TEXTURE_UNITS] =
+{
+   {
+      R200_TXA_ARG_A_R0_ALPHA,
+      R200_TXA_ARG_A_R1_ALPHA,
+      R200_TXA_ARG_A_R2_ALPHA
+   },
+   {
+      R200_TXA_ARG_A_R0_ALPHA | R200_TXA_COMP_ARG_A,
+      R200_TXA_ARG_A_R1_ALPHA | R200_TXA_COMP_ARG_A,
+      R200_TXA_ARG_A_R2_ALPHA | R200_TXA_COMP_ARG_A
+   },
+};
+
+static GLuint r200_tfactor_alpha[] =
+{
+   R200_TXA_ARG_A_TFACTOR_ALPHA,
+   R200_TXA_ARG_A_TFACTOR_ALPHA | R200_TXA_COMP_ARG_A
+};
+
+static GLuint r200_primary_alpha[] =
+{
+   R200_TXA_ARG_A_DIFFUSE_ALPHA,
+   R200_TXA_ARG_A_DIFFUSE_ALPHA | R200_TXA_COMP_ARG_A
+};
+
+/* GL_ZERO table - indices 0-1
+ * GL_ONE  table - indices 1-2
+ */
+static GLuint r200_zero_alpha[] =
+{
+   R200_TXA_ARG_A_ZERO,
+   R200_TXA_ARG_A_ZERO | R200_TXA_COMP_ARG_A,
+   R200_TXA_ARG_A_ZERO,
+};
+
+
+/* Extract the arg from slot A, shift it into the correct argument slot
+ * and set the corresponding complement bit.
+ */
+#define R200_COLOR_ARG( n, arg )                       \
+do {                                                   \
+   color_combine |=                                    \
+      ((color_arg[n] & R200_TXC_ARG_A_MASK)            \
+       << R200_TXC_ARG_##arg##_SHIFT);                 \
+   color_combine |=                                    \
+      ((color_arg[n] >> R200_TXC_COMP_ARG_A_SHIFT)     \
+       << R200_TXC_COMP_ARG_##arg##_SHIFT);            \
+} while (0)
+
+#define R200_ALPHA_ARG( n, arg )                       \
+do {                                                   \
+   alpha_combine |=                                    \
+      ((alpha_arg[n] & R200_TXA_ARG_A_MASK)            \
+       << R200_TXA_ARG_##arg##_SHIFT);                 \
+   alpha_combine |=                                    \
+      ((alpha_arg[n] >> R200_TXA_COMP_ARG_A_SHIFT)     \
+       << R200_TXA_COMP_ARG_##arg##_SHIFT);            \
+} while (0)
+
+
+/* ================================================================
+ * Texture unit state management
+ */
+
+static GLboolean r200UpdateTextureEnv( GLcontext *ctx, int unit )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   GLuint color_combine, alpha_combine;
+   GLuint color_scale = rmesa->hw.pix[unit].cmd[PIX_PP_TXCBLEND2];
+   GLuint alpha_scale = rmesa->hw.pix[unit].cmd[PIX_PP_TXABLEND2];
+
+   /* texUnit->_Current can be NULL if and only if the texture unit is
+    * not actually enabled.
+    */
+   assert( (texUnit->_ReallyEnabled == 0)
+          || (texUnit->_Current != NULL) );
+
+   if ( R200_DEBUG & DEBUG_TEXTURE ) {
+      fprintf( stderr, "%s( %p, %d )\n", __FUNCTION__, ctx, unit );
+   }
+
+   /* Set the texture environment state.  Isn't this nice and clean?
+    * The chip will automagically set the texture alpha to 0xff when
+    * the texture format does not include an alpha component.  This
+    * reduces the amount of special-casing we have to do, alpha-only
+    * textures being a notable exception.
+    */
+   if ( !texUnit->_ReallyEnabled ) {
+      /* Don't cache these results.
+       */
+      rmesa->state.texture.unit[unit].format = 0;
+      rmesa->state.texture.unit[unit].envMode = 0;
+      color_combine = r200_color_combine[unit][R200_DISABLE];
+      alpha_combine = r200_alpha_combine[unit][R200_DISABLE];
+   }
+   else {
+      const struct gl_texture_object *tObj = texUnit->_Current;
+      const GLenum format = tObj->Image[tObj->BaseLevel]->Format;
+      GLuint color_arg[3], alpha_arg[3];
+      GLuint i, numColorArgs = 0, numAlphaArgs = 0;
+      GLuint RGBshift = texUnit->CombineScaleShiftRGB;
+      GLuint Ashift = texUnit->CombineScaleShiftA;
+
+      switch ( texUnit->EnvMode ) {
+      case GL_REPLACE:
+        switch ( format ) {
+        case GL_RGBA:
+        case GL_LUMINANCE_ALPHA:
+        case GL_INTENSITY:
+           color_combine = r200_color_combine[unit][R200_REPLACE];
+           alpha_combine = r200_alpha_combine[unit][R200_REPLACE];
+           break;
+        case GL_ALPHA:
+           color_combine = r200_color_combine[unit][R200_DISABLE];
+           alpha_combine = r200_alpha_combine[unit][R200_REPLACE];
+           break;
+        case GL_LUMINANCE:
+        case GL_RGB:
+        case GL_YCBCR_MESA:
+           color_combine = r200_color_combine[unit][R200_REPLACE];
+           alpha_combine = r200_alpha_combine[unit][R200_DISABLE];
+           break;
+        case GL_COLOR_INDEX:
+        default:
+           return GL_FALSE;
+        }
+        break;
+
+      case GL_MODULATE:
+        switch ( format ) {
+        case GL_RGBA:
+        case GL_LUMINANCE_ALPHA:
+        case GL_INTENSITY:
+           color_combine = r200_color_combine[unit][R200_MODULATE];
+           alpha_combine = r200_alpha_combine[unit][R200_MODULATE];
+           break;
+        case GL_ALPHA:
+           color_combine = r200_color_combine[unit][R200_DISABLE];
+           alpha_combine = r200_alpha_combine[unit][R200_MODULATE];
+           break;
+        case GL_RGB:
+        case GL_LUMINANCE:
+        case GL_YCBCR_MESA:
+           color_combine = r200_color_combine[unit][R200_MODULATE];
+           alpha_combine = r200_alpha_combine[unit][R200_DISABLE];
+           break;
+        case GL_COLOR_INDEX:
+        default:
+           return GL_FALSE;
+        }
+        break;
+
+      case GL_DECAL:
+        switch ( format ) {
+        case GL_RGBA:
+        case GL_RGB:
+        case GL_YCBCR_MESA:
+           color_combine = r200_color_combine[unit][R200_DECAL];
+           alpha_combine = r200_alpha_combine[unit][R200_DISABLE];
+           break;
+        case GL_ALPHA:
+        case GL_LUMINANCE:
+        case GL_LUMINANCE_ALPHA:
+        case GL_INTENSITY:
+           color_combine = r200_color_combine[unit][R200_DISABLE];
+           alpha_combine = r200_alpha_combine[unit][R200_DISABLE];
+           break;
+        case GL_COLOR_INDEX:
+        default:
+           return GL_FALSE;
+        }
+        break;
+
+      case GL_BLEND:
+        switch ( format ) {
+        case GL_RGBA:
+        case GL_RGB:
+        case GL_LUMINANCE:
+        case GL_LUMINANCE_ALPHA:
+        case GL_YCBCR_MESA:
+           color_combine = r200_color_combine[unit][R200_BLEND];
+           alpha_combine = r200_alpha_combine[unit][R200_MODULATE];
+           break;
+        case GL_ALPHA:
+           color_combine = r200_color_combine[unit][R200_DISABLE];
+           alpha_combine = r200_alpha_combine[unit][R200_MODULATE];
+           break;
+        case GL_INTENSITY:
+           color_combine = r200_color_combine[unit][R200_BLEND];
+           alpha_combine = r200_alpha_combine[unit][R200_BLEND];
+           break;
+        case GL_COLOR_INDEX:
+        default:
+           return GL_FALSE;
+        }
+        break;
+
+      case GL_ADD:
+        switch ( format ) {
+        case GL_RGBA:
+        case GL_RGB:
+        case GL_LUMINANCE:
+        case GL_LUMINANCE_ALPHA:
+        case GL_YCBCR_MESA:
+           color_combine = r200_color_combine[unit][R200_ADD];
+           alpha_combine = r200_alpha_combine[unit][R200_MODULATE];
+           break;
+        case GL_ALPHA:
+           color_combine = r200_color_combine[unit][R200_DISABLE];
+           alpha_combine = r200_alpha_combine[unit][R200_MODULATE];
+           break;
+        case GL_INTENSITY:
+           color_combine = r200_color_combine[unit][R200_ADD];
+           alpha_combine = r200_alpha_combine[unit][R200_ADD];
+           break;
+        case GL_COLOR_INDEX:
+        default:
+           return GL_FALSE;
+        }
+        break;
+
+      case GL_COMBINE:
+        /* Don't cache these results.
+         */
+        rmesa->state.texture.unit[unit].format = 0;
+        rmesa->state.texture.unit[unit].envMode = 0;
+
+        /* Step 0:
+         * Calculate how many arguments we need to process.
+         */
+        switch ( texUnit->CombineModeRGB ) {
+        case GL_REPLACE:
+           numColorArgs = 1;
+           break;
+        case GL_MODULATE:
+        case GL_ADD:
+        case GL_ADD_SIGNED:
+        case GL_SUBTRACT:
+        case GL_DOT3_RGB:
+        case GL_DOT3_RGBA:
+        case GL_DOT3_RGB_EXT:
+        case GL_DOT3_RGBA_EXT:
+           numColorArgs = 2;
+           break;
+        case GL_INTERPOLATE:
+        case GL_MODULATE_ADD_ATI:
+        case GL_MODULATE_SIGNED_ADD_ATI:
+        case GL_MODULATE_SUBTRACT_ATI:
+           numColorArgs = 3;
+           break;
+        default:
+           return GL_FALSE;
+        }
+
+        switch ( texUnit->CombineModeA ) {
+        case GL_REPLACE:
+           numAlphaArgs = 1;
+           break;
+        case GL_MODULATE:
+        case GL_ADD:
+        case GL_ADD_SIGNED:
+        case GL_SUBTRACT:
+           numAlphaArgs = 2;
+           break;
+        case GL_INTERPOLATE:
+        case GL_MODULATE_ADD_ATI:
+        case GL_MODULATE_SIGNED_ADD_ATI:
+        case GL_MODULATE_SUBTRACT_ATI:
+           numAlphaArgs = 3;
+           break;
+        default:
+           return GL_FALSE;
+        }
+
+        /* Step 1:
+         * Extract the color and alpha combine function arguments.
+         */
+        for ( i = 0 ; i < numColorArgs ; i++ ) {
+           const GLuint op = texUnit->CombineOperandRGB[i] - GL_SRC_COLOR;
+           assert(op >= 0);
+           assert(op <= 3);
+           switch ( texUnit->CombineSourceRGB[i] ) {
+           case GL_TEXTURE:
+              color_arg[i] = r200_register_color[op][unit];
+              break;
+           case GL_CONSTANT:
+              color_arg[i] = r200_tfactor_color[op];
+              break;
+           case GL_PRIMARY_COLOR:
+              color_arg[i] = r200_primary_color[op];
+              break;
+           case GL_PREVIOUS:
+              if (unit == 0)
+                 color_arg[i] = r200_primary_color[op];
+              else
+                 color_arg[i] = r200_register_color[op][0];
+              break;
+           case GL_ZERO:
+              color_arg[i] = r200_zero_color[op];
+              break;
+           case GL_ONE:
+              color_arg[i] = r200_zero_color[op+1];
+              break;
+           default:
+              return GL_FALSE;
+           }
+        }
+
+        for ( i = 0 ; i < numAlphaArgs ; i++ ) {
+           const GLuint op = texUnit->CombineOperandA[i] - GL_SRC_ALPHA;
+           assert(op >= 0);
+           assert(op <= 1);
+           switch ( texUnit->CombineSourceA[i] ) {
+           case GL_TEXTURE:
+              alpha_arg[i] = r200_register_alpha[op][unit];
+              break;
+           case GL_CONSTANT:
+              alpha_arg[i] = r200_tfactor_alpha[op];
+              break;
+           case GL_PRIMARY_COLOR:
+              alpha_arg[i] = r200_primary_alpha[op];
+              break;
+           case GL_PREVIOUS:
+              if (unit == 0)
+                 alpha_arg[i] = r200_primary_alpha[op];
+              else
+                 alpha_arg[i] = r200_register_alpha[op][0];
+              break;
+           case GL_ZERO:
+              alpha_arg[i] = r200_zero_alpha[op];
+              break;
+           case GL_ONE:
+              alpha_arg[i] = r200_zero_alpha[op+1];
+              break;
+           default:
+              return GL_FALSE;
+           }
+        }
+
+        /* Step 2:
+         * Build up the color and alpha combine functions.
+         */
+        switch ( texUnit->CombineModeRGB ) {
+        case GL_REPLACE:
+           color_combine = (R200_TXC_ARG_A_ZERO |
+                            R200_TXC_ARG_B_ZERO |
+                            R200_TXC_OP_MADD);
+           R200_COLOR_ARG( 0, C );
+           break;
+        case GL_MODULATE:
+           color_combine = (R200_TXC_ARG_C_ZERO |
+                            R200_TXC_OP_MADD);
+           R200_COLOR_ARG( 0, A );
+           R200_COLOR_ARG( 1, B );
+           break;
+        case GL_ADD:
+           color_combine = (R200_TXC_ARG_B_ZERO |
+                            R200_TXC_COMP_ARG_B | 
+                            R200_TXC_OP_MADD);
+           R200_COLOR_ARG( 0, A );
+           R200_COLOR_ARG( 1, C );
+           break;
+        case GL_ADD_SIGNED:
+           color_combine = (R200_TXC_ARG_B_ZERO |
+                            R200_TXC_COMP_ARG_B |
+                            R200_TXC_BIAS_ARG_C |      /* new */
+                            R200_TXC_OP_MADD); /* was ADDSIGNED */
+           R200_COLOR_ARG( 0, A );
+           R200_COLOR_ARG( 1, C );
+           break;
+        case GL_SUBTRACT:
+           color_combine = (R200_TXC_ARG_B_ZERO |
+                            R200_TXC_COMP_ARG_B | 
+                            R200_TXC_NEG_ARG_C |
+                            R200_TXC_OP_MADD);
+           R200_COLOR_ARG( 0, A );
+           R200_COLOR_ARG( 1, C );
+           break;
+        case GL_INTERPOLATE:
+           color_combine = (R200_TXC_OP_LERP);
+           R200_COLOR_ARG( 0, B );
+           R200_COLOR_ARG( 1, A );
+           R200_COLOR_ARG( 2, C );
+           break;
+
+        case GL_DOT3_RGB_EXT:
+        case GL_DOT3_RGBA_EXT:
+           /* The EXT version of the DOT3 extension does not support the
+            * scale factor, but the ARB version (and the version in OpenGL
+            * 1.3) does.
+            */
+           RGBshift = 0;
+           Ashift = 0;
+           /* FALLTHROUGH */
+
+        case GL_DOT3_RGB:
+        case GL_DOT3_RGBA:
+           /* DOT3 works differently on R200 than on R100.  On R100, just
+            * setting the DOT3 mode did everything for you.  On R200, the
+            * driver has to enable the biasing (the -0.5 in the combine
+            * equation), and it has add the 4x scale factor.  The hardware
+            * only supports up to 8x in the post filter, so 2x part of it
+            * happens on the inputs going into the combiner.
+            */
+
+           RGBshift++;
+           Ashift = RGBshift;
+
+           color_combine = (R200_TXC_ARG_C_ZERO |
+                            R200_TXC_OP_DOT3 |
+                            R200_TXC_BIAS_ARG_A |
+                            R200_TXC_BIAS_ARG_B |
+                            R200_TXC_SCALE_ARG_A |
+                            R200_TXC_SCALE_ARG_B);
+           R200_COLOR_ARG( 0, A );
+           R200_COLOR_ARG( 1, B );
+           break;
+
+        case GL_MODULATE_ADD_ATI:
+           color_combine = (R200_TXC_OP_MADD);
+           R200_COLOR_ARG( 0, A );
+           R200_COLOR_ARG( 1, C );
+           R200_COLOR_ARG( 2, B );
+           break;
+        case GL_MODULATE_SIGNED_ADD_ATI:
+           color_combine = (R200_TXC_BIAS_ARG_C |      /* new */
+                            R200_TXC_OP_MADD); /* was ADDSIGNED */
+           R200_COLOR_ARG( 0, A );
+           R200_COLOR_ARG( 1, C );
+           R200_COLOR_ARG( 2, B );
+           break;
+        case GL_MODULATE_SUBTRACT_ATI:
+           color_combine = (R200_TXC_NEG_ARG_C |
+                            R200_TXC_OP_MADD);
+           R200_COLOR_ARG( 0, A );
+           R200_COLOR_ARG( 1, C );
+           R200_COLOR_ARG( 2, B );
+           break;
+        default:
+           return GL_FALSE;
+        }
+
+        switch ( texUnit->CombineModeA ) {
+        case GL_REPLACE:
+           alpha_combine = (R200_TXA_ARG_A_ZERO |
+                            R200_TXA_ARG_B_ZERO |
+                            R200_TXA_OP_MADD);
+           R200_ALPHA_ARG( 0, C );
+           break;
+        case GL_MODULATE:
+           alpha_combine = (R200_TXA_ARG_C_ZERO |
+                            R200_TXA_OP_MADD);
+           R200_ALPHA_ARG( 0, A );
+           R200_ALPHA_ARG( 1, B );
+           break;
+        case GL_ADD:
+           alpha_combine = (R200_TXA_ARG_B_ZERO |
+                            R200_TXA_COMP_ARG_B |
+                            R200_TXA_OP_MADD);
+           R200_ALPHA_ARG( 0, A );
+           R200_ALPHA_ARG( 1, C );
+           break;
+        case GL_ADD_SIGNED:
+           alpha_combine = (R200_TXA_ARG_B_ZERO |
+                            R200_TXA_COMP_ARG_B |
+                            R200_TXA_BIAS_ARG_C |      /* new */
+                            R200_TXA_OP_MADD); /* was ADDSIGNED */
+           R200_ALPHA_ARG( 0, A );
+           R200_ALPHA_ARG( 1, C );
+           break;
+        case GL_SUBTRACT:
+           alpha_combine = (R200_TXA_ARG_B_ZERO |
+                            R200_TXA_COMP_ARG_B |
+                            R200_TXA_NEG_ARG_C |
+                            R200_TXA_OP_MADD);
+           R200_ALPHA_ARG( 0, A );
+           R200_ALPHA_ARG( 1, C );
+           break;
+        case GL_INTERPOLATE:
+           alpha_combine = (R200_TXA_OP_LERP);
+           R200_ALPHA_ARG( 0, B );
+           R200_ALPHA_ARG( 1, A );
+           R200_ALPHA_ARG( 2, C );
+           break;
+
+        case GL_MODULATE_ADD_ATI:
+           alpha_combine = (R200_TXA_OP_MADD);
+           R200_ALPHA_ARG( 0, A );
+           R200_ALPHA_ARG( 1, C );
+           R200_ALPHA_ARG( 2, B );
+           break;
+        case GL_MODULATE_SIGNED_ADD_ATI:
+           alpha_combine = (R200_TXA_BIAS_ARG_C |      /* new */
+                            R200_TXA_OP_MADD); /* was ADDSIGNED */
+           R200_ALPHA_ARG( 0, A );
+           R200_ALPHA_ARG( 1, C );
+           R200_ALPHA_ARG( 2, B );
+           break;
+        case GL_MODULATE_SUBTRACT_ATI:
+           alpha_combine = (R200_TXA_NEG_ARG_C |
+                            R200_TXA_OP_MADD);
+           R200_ALPHA_ARG( 0, A );
+           R200_ALPHA_ARG( 1, C );
+           R200_ALPHA_ARG( 2, B );
+           break;
+        default:
+           return GL_FALSE;
+        }
+
+        if ( (texUnit->CombineModeRGB == GL_DOT3_RGB_EXT)
+             || (texUnit->CombineModeRGB == GL_DOT3_RGB) ) {
+           alpha_scale |= R200_TXA_DOT_ALPHA;
+        }
+
+        /* Step 3:
+         * Apply the scale factor.
+         */
+        color_scale &= ~R200_TXC_SCALE_MASK;
+        alpha_scale &= ~R200_TXA_SCALE_MASK;
+        color_scale |= (RGBshift << R200_TXC_SCALE_SHIFT);
+        alpha_scale |= (Ashift   << R200_TXA_SCALE_SHIFT);
+
+        /* All done!
+         */
+        break;
+
+      default:
+        return GL_FALSE;
+      }
+   }
+
+   if ( rmesa->hw.pix[unit].cmd[PIX_PP_TXCBLEND] != color_combine ||
+       rmesa->hw.pix[unit].cmd[PIX_PP_TXABLEND] != alpha_combine ||
+       rmesa->hw.pix[unit].cmd[PIX_PP_TXCBLEND2] != color_scale ||
+       rmesa->hw.pix[unit].cmd[PIX_PP_TXABLEND2] != alpha_scale) {
+      R200_STATECHANGE( rmesa, pix[unit] );
+      rmesa->hw.pix[unit].cmd[PIX_PP_TXCBLEND] = color_combine;
+      rmesa->hw.pix[unit].cmd[PIX_PP_TXABLEND] = alpha_combine;
+      rmesa->hw.pix[unit].cmd[PIX_PP_TXCBLEND2] = color_scale;
+      rmesa->hw.pix[unit].cmd[PIX_PP_TXABLEND2] = alpha_scale;
+   }
+
+   return GL_TRUE;
+}
+
+#define TEXOBJ_TXFILTER_MASK (R200_MAX_MIP_LEVEL_MASK |                \
+                             R200_MIN_FILTER_MASK |            \
+                             R200_MAG_FILTER_MASK |            \
+                             R200_MAX_ANISO_MASK |             \
+                             R200_YUV_TO_RGB |                 \
+                             R200_YUV_TEMPERATURE_MASK |       \
+                             R200_CLAMP_S_MASK |               \
+                             R200_CLAMP_T_MASK |               \
+                             R200_BORDER_MODE_D3D )
+
+#define TEXOBJ_TXFORMAT_MASK (R200_TXFORMAT_WIDTH_MASK |       \
+                             R200_TXFORMAT_HEIGHT_MASK |       \
+                             R200_TXFORMAT_FORMAT_MASK |       \
+                              R200_TXFORMAT_F5_WIDTH_MASK |    \
+                              R200_TXFORMAT_F5_HEIGHT_MASK |   \
+                             R200_TXFORMAT_ALPHA_IN_MAP |      \
+                             R200_TXFORMAT_CUBIC_MAP_ENABLE |  \
+                              R200_TXFORMAT_NON_POWER2)
+
+#define TEXOBJ_TXFORMAT_X_MASK (R200_DEPTH_LOG2_MASK |         \
+                                R200_TEXCOORD_MASK |           \
+                                R200_VOLUME_FILTER_MASK)
+
+
+static void import_tex_obj_state( r200ContextPtr rmesa,
+                                 int unit,
+                                 r200TexObjPtr texobj )
+{
+   GLuint *cmd = R200_DB_STATE( tex[unit] );
+
+   cmd[TEX_PP_TXFILTER] &= ~TEXOBJ_TXFILTER_MASK;
+   cmd[TEX_PP_TXFILTER] |= texobj->pp_txfilter & TEXOBJ_TXFILTER_MASK;
+   cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
+   cmd[TEX_PP_TXFORMAT] |= texobj->pp_txformat & TEXOBJ_TXFORMAT_MASK;
+   cmd[TEX_PP_TXFORMAT_X] &= ~TEXOBJ_TXFORMAT_X_MASK;
+   cmd[TEX_PP_TXFORMAT_X] |= texobj->pp_txformat_x & TEXOBJ_TXFORMAT_X_MASK;
+   cmd[TEX_PP_TXSIZE] = texobj->pp_txsize; /* NPOT only! */
+   cmd[TEX_PP_TXPITCH] = texobj->pp_txpitch; /* NPOT only! */
+   cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset;
+   cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color;
+   R200_DB_STATECHANGE( rmesa, &rmesa->hw.tex[unit] );
+
+   if (texobj->base.tObj->Target == GL_TEXTURE_CUBE_MAP) {
+      GLuint *cube_cmd = R200_DB_STATE( cube[unit] );
+      GLuint bytesPerFace = texobj->base.totalSize / 6;
+      ASSERT(texobj->totalSize % 6 == 0);
+      cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
+      cube_cmd[CUBE_PP_CUBIC_OFFSET_F1] = texobj->pp_txoffset + 1 * bytesPerFace;
+      cube_cmd[CUBE_PP_CUBIC_OFFSET_F2] = texobj->pp_txoffset + 2 * bytesPerFace;
+      cube_cmd[CUBE_PP_CUBIC_OFFSET_F3] = texobj->pp_txoffset + 3 * bytesPerFace;
+      cube_cmd[CUBE_PP_CUBIC_OFFSET_F4] = texobj->pp_txoffset + 4 * bytesPerFace;
+      cube_cmd[CUBE_PP_CUBIC_OFFSET_F5] = texobj->pp_txoffset + 5 * bytesPerFace;
+      R200_DB_STATECHANGE( rmesa, &rmesa->hw.cube[unit] );
+   }
+
+   texobj->dirty_state &= ~(1<<unit);
+}
+
+
+
+
+static void set_texgen_matrix( r200ContextPtr rmesa, 
+                              GLuint unit,
+                              const GLfloat *s_plane,
+                              const GLfloat *t_plane,
+                              const GLfloat *r_plane )
+{
+   static const GLfloat scale_identity[4] = { 1,1,1,1 };
+
+   if (!TEST_EQ_4V( s_plane, scale_identity) ||
+       !TEST_EQ_4V( t_plane, scale_identity) ||
+       !TEST_EQ_4V( r_plane, scale_identity)) {
+      rmesa->TexGenEnabled |= R200_TEXMAT_0_ENABLE<<unit;
+      rmesa->TexGenMatrix[unit].m[0]  = s_plane[0];
+      rmesa->TexGenMatrix[unit].m[4]  = s_plane[1];
+      rmesa->TexGenMatrix[unit].m[8]  = s_plane[2];
+      rmesa->TexGenMatrix[unit].m[12] = s_plane[3];
+
+      rmesa->TexGenMatrix[unit].m[1]  = t_plane[0];
+      rmesa->TexGenMatrix[unit].m[5]  = t_plane[1];
+      rmesa->TexGenMatrix[unit].m[9]  = t_plane[2];
+      rmesa->TexGenMatrix[unit].m[13] = t_plane[3];
+
+      /* NOTE: r_plane goes in the 4th row, not 3rd! */
+      rmesa->TexGenMatrix[unit].m[3]  = r_plane[0];
+      rmesa->TexGenMatrix[unit].m[7]  = r_plane[1];
+      rmesa->TexGenMatrix[unit].m[11] = r_plane[2];
+      rmesa->TexGenMatrix[unit].m[15] = r_plane[3];
+
+      rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
+   }
+}
+
+/* Need this special matrix to get correct reflection map coords */
+static void
+set_texgen_reflection_matrix( r200ContextPtr rmesa, GLuint unit )
+{
+   static const GLfloat m[16] = {
+      -1,  0,  0,  0,
+       0, -1,  0,  0,
+       0,  0,  0, -1,
+       0,  0, -1,  0 };
+   _math_matrix_loadf( &(rmesa->TexGenMatrix[unit]), m);
+   _math_matrix_analyse( &(rmesa->TexGenMatrix[unit]) );
+   rmesa->TexGenEnabled |= R200_TEXMAT_0_ENABLE<<unit;
+}
+
+/* Need this special matrix to get correct normal map coords */
+static void
+set_texgen_normal_map_matrix( r200ContextPtr rmesa, GLuint unit )
+{
+   static const GLfloat m[16] = {
+      1, 0, 0, 0,
+      0, 1, 0, 0,
+      0, 0, 0, 1,
+      0, 0, 1, 0 };
+   _math_matrix_loadf( &(rmesa->TexGenMatrix[unit]), m);
+   _math_matrix_analyse( &(rmesa->TexGenMatrix[unit]) );
+   rmesa->TexGenEnabled |= R200_TEXMAT_0_ENABLE<<unit;
+}
+
+
+/* Ignoring the Q texcoord for now.
+ *
+ * Returns GL_FALSE if fallback required.  
+ */
+static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit )
+{  
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   GLuint inputshift = R200_TEXGEN_0_INPUT_SHIFT + unit*4;
+   GLuint tmp = rmesa->TexGenEnabled;
+
+   rmesa->TexGenCompSel &= ~(R200_OUTPUT_TEX_0 << unit);
+   rmesa->TexGenEnabled &= ~(R200_TEXGEN_TEXMAT_0_ENABLE<<unit);
+   rmesa->TexGenEnabled &= ~(R200_TEXMAT_0_ENABLE<<unit);
+   rmesa->TexGenInputs &= ~(R200_TEXGEN_INPUT_MASK<<inputshift);
+   rmesa->TexGenNeedNormals[unit] = 0;
+
+   if (0) 
+      fprintf(stderr, "%s unit %d\n", __FUNCTION__, unit);
+
+   if ((texUnit->TexGenEnabled & (S_BIT|T_BIT|R_BIT)) == 0) {
+      /* Disabled, no fallback:
+       */
+      rmesa->TexGenInputs |= 
+        (R200_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift;
+      return GL_TRUE;
+   }
+   else if (texUnit->TexGenEnabled & Q_BIT) {
+      /* Very easy to do this, in fact would remove a fallback case
+       * elsewhere, but I haven't done it yet...  Fallback: 
+       */
+      /*fprintf(stderr, "fallback Q_BIT\n");*/
+      return GL_FALSE;
+   }
+   else if (texUnit->TexGenEnabled == (S_BIT|T_BIT) &&
+           texUnit->GenModeS == texUnit->GenModeT) {
+      /* OK */
+      rmesa->TexGenEnabled |= R200_TEXGEN_TEXMAT_0_ENABLE << unit;
+      /* continue */
+   }
+   else if (texUnit->TexGenEnabled == (S_BIT|T_BIT|R_BIT) &&
+           texUnit->GenModeS == texUnit->GenModeT &&
+            texUnit->GenModeT == texUnit->GenModeR) {
+      /* OK */
+      rmesa->TexGenEnabled |= R200_TEXGEN_TEXMAT_0_ENABLE << unit;
+      /* continue */
+   }
+   else {
+      /* Mixed modes, fallback:
+       */
+      /* fprintf(stderr, "fallback mixed texgen\n"); */
+      return GL_FALSE;
+   }
+
+   rmesa->TexGenEnabled |= R200_TEXGEN_TEXMAT_0_ENABLE << unit;
+
+   switch (texUnit->GenModeS) {
+   case GL_OBJECT_LINEAR:
+      rmesa->TexGenInputs |= R200_TEXGEN_INPUT_OBJ << inputshift;
+      set_texgen_matrix( rmesa, unit, 
+                        texUnit->ObjectPlaneS,
+                        texUnit->ObjectPlaneT,
+                         texUnit->ObjectPlaneR);
+      break;
+
+   case GL_EYE_LINEAR:
+      rmesa->TexGenInputs |= R200_TEXGEN_INPUT_EYE << inputshift;
+      set_texgen_matrix( rmesa, unit, 
+                        texUnit->EyePlaneS,
+                        texUnit->EyePlaneT,
+                        texUnit->EyePlaneR);
+      break;
+
+   case GL_REFLECTION_MAP_NV:
+      rmesa->TexGenNeedNormals[unit] = GL_TRUE;
+      rmesa->TexGenInputs |= R200_TEXGEN_INPUT_EYE_REFLECT<<inputshift;
+      set_texgen_reflection_matrix(rmesa, unit);
+      break;
+
+   case GL_NORMAL_MAP_NV:
+      rmesa->TexGenNeedNormals[unit] = GL_TRUE;
+      rmesa->TexGenInputs |= R200_TEXGEN_INPUT_EYE_NORMAL<<inputshift;
+      set_texgen_normal_map_matrix(rmesa, unit);
+      break;
+
+   case GL_SPHERE_MAP:
+      rmesa->TexGenNeedNormals[unit] = GL_TRUE;
+      rmesa->TexGenInputs |= R200_TEXGEN_INPUT_SPHERE<<inputshift;
+      break;
+
+   default:
+      /* Unsupported mode, fallback:
+       */
+      /*  fprintf(stderr, "fallback unsupported texgen\n"); */
+      return GL_FALSE;
+   }
+
+   rmesa->TexGenCompSel |= R200_OUTPUT_TEX_0 << unit;
+
+   if (tmp != rmesa->TexGenEnabled) {
+      rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
+   }
+
+   return GL_TRUE;
+}
+
+
+static void disable_tex( GLcontext *ctx, int unit )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   if (rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_0_ENABLE<<unit)) {
+      /* Texture unit disabled */
+      if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
+        /* The old texture is no longer bound to this texture unit.
+         * Mark it as such.
+         */
+
+        rmesa->state.texture.unit[unit].texobj->base.bound &= ~(1UL << unit);
+        rmesa->state.texture.unit[unit].texobj = NULL;
+      }
+
+      R200_STATECHANGE( rmesa, ctx );
+      rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~((R200_TEX_0_ENABLE |
+                                          R200_TEX_BLEND_0_ENABLE) << unit);
+      rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_BLEND_0_ENABLE; 
+        
+      R200_STATECHANGE( rmesa, tcl );
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3));
+        
+      if (rmesa->TclFallback & (R200_TCL_FALLBACK_TEXGEN_0<<unit)) {
+        TCL_FALLBACK( ctx, (R200_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
+      }
+
+      /* Actually want to keep all units less than max active texture
+       * enabled, right?  Fix this for >2 texunits.
+       */
+      /* FIXME: What should happen here if r200UpdateTextureEnv fails? */
+      if (unit == 0) 
+        r200UpdateTextureEnv( ctx, unit ); 
+
+
+      {
+        GLuint inputshift = R200_TEXGEN_0_INPUT_SHIFT + unit*4;
+        GLuint tmp = rmesa->TexGenEnabled;
+
+        rmesa->TexGenEnabled &= ~(R200_TEXGEN_TEXMAT_0_ENABLE<<unit);
+        rmesa->TexGenEnabled &= ~(R200_TEXMAT_0_ENABLE<<unit);
+        rmesa->TexGenEnabled &= ~(R200_TEXGEN_INPUT_MASK<<inputshift);
+        rmesa->TexGenNeedNormals[unit] = 0;
+        rmesa->TexGenCompSel &= ~(R200_OUTPUT_TEX_0 << unit);
+        rmesa->TexGenInputs &= ~(R200_TEXGEN_INPUT_MASK<<inputshift);
+
+        if (tmp != rmesa->TexGenEnabled) {
+           rmesa->recheck_texgen[unit] = GL_TRUE;
+           rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
+        }
+      }
+   }
+}
+
+static GLboolean enable_tex_2d( GLcontext *ctx, int unit )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   struct gl_texture_object *tObj = texUnit->_Current;
+   r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
+
+   /* Need to load the 2d images associated with this unit.
+    */
+   if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) {
+      t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2;
+      t->base.dirty_images[0] = ~0;
+   }
+
+   ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D);
+
+   if ( t->base.dirty_images[0] ) {
+      R200_FIREVERTICES( rmesa );
+      r200SetTexImages( rmesa, tObj );
+      r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, 0 );
+      if ( !t->base.memBlock ) 
+        return GL_FALSE;
+   }
+
+   return GL_TRUE;
+}
+
+#if ENABLE_HW_3D_TEXTURE
+static GLboolean enable_tex_3d( GLcontext *ctx, int unit )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   struct gl_texture_object *tObj = texUnit->_Current;
+   r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
+
+   /* Need to load the 3d images associated with this unit.
+    */
+   if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) {
+      t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2;
+      t->base.dirty_images[0] = ~0;
+   }
+
+   ASSERT(tObj->Target == GL_TEXTURE_3D);
+
+   if ( t->base.dirty_images[0] ) {
+      R200_FIREVERTICES( rmesa );
+      r200SetTexImages( rmesa, tObj );
+      r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, 0 );
+      if ( !t->base.memBlock ) 
+        return GL_FALSE;
+   }
+
+   return GL_TRUE;
+}
+#endif
+
+static GLboolean enable_tex_cube( GLcontext *ctx, int unit )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   struct gl_texture_object *tObj = texUnit->_Current;
+   r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
+   GLuint face;
+
+   /* Need to load the 2d images associated with this unit.
+    */
+   if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) {
+      t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2;
+      for (face = 0; face < 6; face++)
+         t->base.dirty_images[face] = ~0;
+   }
+
+   ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP);
+
+   if ( t->base.dirty_images[0] || t->base.dirty_images[1] ||
+        t->base.dirty_images[2] || t->base.dirty_images[3] ||
+        t->base.dirty_images[4] || t->base.dirty_images[5] ) {
+      /* flush */
+      R200_FIREVERTICES( rmesa );
+      /* layout memory space, once for all faces */
+      r200SetTexImages( rmesa, tObj );
+   }
+
+   /* upload (per face) */
+   for (face = 0; face < 6; face++) {
+      if (t->base.dirty_images[face]) {
+         r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, face );
+      }
+   }
+      
+   if ( !t->base.memBlock ) {
+      /* texmem alloc failed, use s/w fallback */
+      return GL_FALSE;
+   }
+
+   return GL_TRUE;
+}
+
+static GLboolean enable_tex_rect( GLcontext *ctx, int unit )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   struct gl_texture_object *tObj = texUnit->_Current;
+   r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
+
+   if (!(t->pp_txformat & R200_TXFORMAT_NON_POWER2)) {
+      t->pp_txformat |= R200_TXFORMAT_NON_POWER2;
+      t->base.dirty_images[0] = ~0;
+   }
+
+   ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV);
+
+   if ( t->base.dirty_images[0] ) {
+      R200_FIREVERTICES( rmesa );
+      r200SetTexImages( rmesa, tObj );
+      r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, 0 );
+      if ( !t->base.memBlock && !rmesa->prefer_agp_client_texturing ) 
+        return GL_FALSE;
+   }
+
+   return GL_TRUE;
+}
+
+
+static GLboolean update_tex_common( GLcontext *ctx, int unit )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   struct gl_texture_object *tObj = texUnit->_Current;
+   r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
+   GLenum format;
+
+   /* Fallback if there's a texture border */
+   if ( tObj->Image[tObj->BaseLevel]->Border > 0 )
+       return GL_FALSE;
+
+   /* Update state if this is a different texture object to last
+    * time.
+    */
+   if ( rmesa->state.texture.unit[unit].texobj != t ) {
+      if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
+        /* The old texture is no longer bound to this texture unit.
+         * Mark it as such.
+         */
+
+        rmesa->state.texture.unit[unit].texobj->base.bound &= 
+            ~(1UL << unit);
+      }
+
+      rmesa->state.texture.unit[unit].texobj = t;
+      t->base.bound |= (1UL << unit);
+      t->dirty_state |= 1<<unit;
+      driUpdateTextureLRU( (driTextureObject *) t ); /* XXX: should be locked! */
+   }
+
+
+   /* Newly enabled?
+    */
+   if ( 1|| !(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_0_ENABLE<<unit))) {
+      R200_STATECHANGE( rmesa, ctx );
+      rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= (R200_TEX_0_ENABLE | 
+                                        R200_TEX_BLEND_0_ENABLE) << unit;
+
+      R200_STATECHANGE( rmesa, vtx );
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] |= 4 << (unit * 3);
+
+      rmesa->recheck_texgen[unit] = GL_TRUE;
+   }
+
+   if (t->dirty_state & (1<<unit)) {
+      import_tex_obj_state( rmesa, unit, t );
+   }
+
+   if (rmesa->recheck_texgen[unit]) {
+      GLboolean fallback = !r200_validate_texgen( ctx, unit );
+      TCL_FALLBACK( ctx, (R200_TCL_FALLBACK_TEXGEN_0<<unit), fallback);
+      rmesa->recheck_texgen[unit] = 0;
+      rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
+   }
+
+   format = tObj->Image[tObj->BaseLevel]->Format;
+   if ( rmesa->state.texture.unit[unit].format != format ||
+       rmesa->state.texture.unit[unit].envMode != texUnit->EnvMode ) {
+      rmesa->state.texture.unit[unit].format = format;
+      rmesa->state.texture.unit[unit].envMode = texUnit->EnvMode;
+      if ( ! r200UpdateTextureEnv( ctx, unit ) ) {
+        return GL_FALSE;
+      }
+   }
+
+   FALLBACK( rmesa, R200_FALLBACK_BORDER_MODE, t->border_fallback );
+   return !t->border_fallback;
+}
+
+
+
+static GLboolean r200UpdateTextureUnit( GLcontext *ctx, int unit )
+{
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+
+   if ( texUnit->_ReallyEnabled & (TEXTURE_RECT_BIT) ) {
+      return (enable_tex_rect( ctx, unit ) &&
+             update_tex_common( ctx, unit ));
+   }
+   else if ( texUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT) ) {
+      return (enable_tex_2d( ctx, unit ) &&
+             update_tex_common( ctx, unit ));
+   }
+#if ENABLE_HW_3D_TEXTURE
+   else if ( texUnit->_ReallyEnabled & (TEXTURE_3D_BIT) ) {
+      return (enable_tex_3d( ctx, unit ) &&
+             update_tex_common( ctx, unit ));
+   }
+#endif
+   else if ( texUnit->_ReallyEnabled & (TEXTURE_CUBE_BIT) ) {
+      return (enable_tex_cube( ctx, unit ) &&
+             update_tex_common( ctx, unit ));
+   }
+   else if ( texUnit->_ReallyEnabled ) {
+      return GL_FALSE;
+   }
+   else {
+      disable_tex( ctx, unit );
+      return GL_TRUE;
+   }
+}
+
+
+void r200UpdateTextureState( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLboolean ok;
+   GLuint dbg;
+
+   ok = (r200UpdateTextureUnit( ctx, 0 ) &&
+        r200UpdateTextureUnit( ctx, 1 ));
+
+   FALLBACK( rmesa, R200_FALLBACK_TEXTURE, !ok );
+
+   if (rmesa->TclFallback)
+      r200ChooseVertexState( ctx );
+
+   /*
+    * T0 hang workaround -------------
+    */
+#if 1
+   if ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_ENABLE_MASK) == R200_TEX_0_ENABLE &&
+       (rmesa->hw.tex[0].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK) > R200_MIN_FILTER_LINEAR) {
+
+      R200_STATECHANGE(rmesa, ctx);
+      R200_STATECHANGE(rmesa, tex[1]);
+      rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_1_ENABLE;
+      rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
+      rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] |= 0x08000000;
+   }
+   else {
+      if ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_1_ENABLE) &&
+         (rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] & 0x08000000)) {
+        R200_STATECHANGE(rmesa, tex[1]);
+        rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~0x08000000;
+      }
+   }
+#endif
+
+#if 1
+   /*
+    * Texture cache LRU hang workaround -------------
+    */
+   dbg = 0x0;
+   if (((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_0_ENABLE) &&
+       ((((rmesa->hw.tex[0].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) & 
+         0x04) == 0)))
+   {
+      dbg |= 0x02;
+   }
+
+   if (((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_1_ENABLE) &&
+       ((((rmesa->hw.tex[1].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) & 
+         0x04) == 0)))
+   {
+      dbg |= 0x04;
+   }
+
+   if (dbg != rmesa->hw.tam.cmd[TAM_DEBUG3]) {
+      R200_STATECHANGE( rmesa, tam );
+      rmesa->hw.tam.cmd[TAM_DEBUG3] = dbg;
+      if (0) printf("TEXCACHE LRU HANG WORKAROUND %x\n", dbg);
+   }
+#endif
+}
+
+/*
+  also tests for higher texunits:
+
+       ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_2_ENABLE) &&
+       ((((rmesa->hw.tex[2].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) & 0x04) == 0)) ||
+       ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_4_ENABLE) &&
+       ((((rmesa->hw.tex[4].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) & 0x04) == 0)))
+
+       ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_3_ENABLE) &&
+       ((((rmesa->hw.tex[3].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) & 0x04) == 0)) ||
+       ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_5_ENABLE) &&
+       ((((rmesa->hw.tex[5].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) & 0x04) == 0)))
+
+*/
diff --git a/src/mesa/drivers/dri/r200/r200_vtxfmt.c b/src/mesa/drivers/dri/r200/r200_vtxfmt.c
new file mode 100644 (file)
index 0000000..5a4f59d
--- /dev/null
@@ -0,0 +1,1125 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "glheader.h"
+#include "imports.h"
+#include "r200_context.h"
+#include "r200_state.h"
+#include "r200_ioctl.h"
+#include "r200_tex.h"
+#include "r200_tcl.h"
+#include "r200_swtcl.h"
+#include "r200_vtxfmt.h"
+
+#include "api_noop.h"
+#include "api_arrayelt.h"
+#include "context.h"
+#include "mtypes.h"
+#include "enums.h"
+#include "glapi.h"
+#include "colormac.h"
+#include "light.h"
+#include "state.h"
+#include "vtxfmt.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_array_api.h"
+
+static void r200VtxFmtFlushVertices( GLcontext *, GLuint );
+
+static void count_func( const char *name,  struct dynfn *l )
+{
+   int i = 0;
+   struct dynfn *f;
+   foreach (f, l) i++;
+   if (i) fprintf(stderr, "%s: %d\n", name, i );
+}
+
+static void count_funcs( r200ContextPtr rmesa )
+{
+   count_func( "Vertex2f", &rmesa->vb.dfn_cache.Vertex2f );
+   count_func( "Vertex2fv", &rmesa->vb.dfn_cache.Vertex2fv );
+   count_func( "Vertex3f", &rmesa->vb.dfn_cache.Vertex3f );
+   count_func( "Vertex3fv", &rmesa->vb.dfn_cache.Vertex3fv );
+   count_func( "Color4ub", &rmesa->vb.dfn_cache.Color4ub );
+   count_func( "Color4ubv", &rmesa->vb.dfn_cache.Color4ubv );
+   count_func( "Color3ub", &rmesa->vb.dfn_cache.Color3ub );
+   count_func( "Color3ubv", &rmesa->vb.dfn_cache.Color3ubv );
+   count_func( "Color4f", &rmesa->vb.dfn_cache.Color4f );
+   count_func( "Color4fv", &rmesa->vb.dfn_cache.Color4fv );
+   count_func( "Color3f", &rmesa->vb.dfn_cache.Color3f );
+   count_func( "Color3fv", &rmesa->vb.dfn_cache.Color3fv );
+   count_func( "SecondaryColor3f", &rmesa->vb.dfn_cache.SecondaryColor3fEXT );
+   count_func( "SecondaryColor3fv", &rmesa->vb.dfn_cache.SecondaryColor3fvEXT );
+   count_func( "SecondaryColor3ub", &rmesa->vb.dfn_cache.SecondaryColor3ubEXT );
+   count_func( "SecondaryColor3ubv", &rmesa->vb.dfn_cache.SecondaryColor3ubvEXT );
+   count_func( "Normal3f", &rmesa->vb.dfn_cache.Normal3f );
+   count_func( "Normal3fv", &rmesa->vb.dfn_cache.Normal3fv );
+   count_func( "TexCoord2f", &rmesa->vb.dfn_cache.TexCoord2f );
+   count_func( "TexCoord2fv", &rmesa->vb.dfn_cache.TexCoord2fv );
+   count_func( "TexCoord1f", &rmesa->vb.dfn_cache.TexCoord1f );
+   count_func( "TexCoord1fv", &rmesa->vb.dfn_cache.TexCoord1fv );
+   count_func( "MultiTexCoord2fARB", &rmesa->vb.dfn_cache.MultiTexCoord2fARB );
+   count_func( "MultiTexCoord2fvARB", &rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
+   count_func( "MultiTexCoord1fARB", &rmesa->vb.dfn_cache.MultiTexCoord1fARB );
+   count_func( "MultiTexCoord1fvARB", &rmesa->vb.dfn_cache.MultiTexCoord1fvARB );
+}
+
+
+void r200_copy_to_current( GLcontext *ctx ) 
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   if (R200_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   assert(ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT);
+
+   if (rmesa->vb.vtxfmt_0 & R200_VTX_N0) {
+      ctx->Current.Attrib[VERT_ATTRIB_NORMAL][0] = rmesa->vb.normalptr[0];
+      ctx->Current.Attrib[VERT_ATTRIB_NORMAL][1] = rmesa->vb.normalptr[1];
+      ctx->Current.Attrib[VERT_ATTRIB_NORMAL][2] = rmesa->vb.normalptr[2];
+   }
+
+   switch( VTX_COLOR(rmesa->vb.vtxfmt_0, 0) ) {
+   case R200_VTX_PK_RGBA:
+      ctx->Current.Attrib[VERT_ATTRIB_COLOR0][0] = UBYTE_TO_FLOAT( rmesa->vb.colorptr->red );
+      ctx->Current.Attrib[VERT_ATTRIB_COLOR0][1] = UBYTE_TO_FLOAT( rmesa->vb.colorptr->green );
+      ctx->Current.Attrib[VERT_ATTRIB_COLOR0][2] = UBYTE_TO_FLOAT( rmesa->vb.colorptr->blue );
+      ctx->Current.Attrib[VERT_ATTRIB_COLOR0][3] = UBYTE_TO_FLOAT( rmesa->vb.colorptr->alpha );
+      break;
+
+   case R200_VTX_FP_RGB:
+      ctx->Current.Attrib[VERT_ATTRIB_COLOR0][0] = rmesa->vb.floatcolorptr[0];
+      ctx->Current.Attrib[VERT_ATTRIB_COLOR0][1] = rmesa->vb.floatcolorptr[1];
+      ctx->Current.Attrib[VERT_ATTRIB_COLOR0][2] = rmesa->vb.floatcolorptr[2];
+      break;
+
+   case R200_VTX_FP_RGBA:
+      ctx->Current.Attrib[VERT_ATTRIB_COLOR0][0] = rmesa->vb.floatcolorptr[0];
+      ctx->Current.Attrib[VERT_ATTRIB_COLOR0][1] = rmesa->vb.floatcolorptr[1];
+      ctx->Current.Attrib[VERT_ATTRIB_COLOR0][2] = rmesa->vb.floatcolorptr[2];
+      ctx->Current.Attrib[VERT_ATTRIB_COLOR0][3] = rmesa->vb.floatcolorptr[3];
+      break;
+      
+   default:
+      break;
+   }
+      
+   if (VTX_COLOR(rmesa->vb.vtxfmt_0, 1) == R200_VTX_PK_RGBA) {
+      ctx->Current.Attrib[VERT_ATTRIB_COLOR1][0] = UBYTE_TO_FLOAT( rmesa->vb.specptr->red );
+      ctx->Current.Attrib[VERT_ATTRIB_COLOR1][1] = UBYTE_TO_FLOAT( rmesa->vb.specptr->green );
+      ctx->Current.Attrib[VERT_ATTRIB_COLOR1][2] = UBYTE_TO_FLOAT( rmesa->vb.specptr->blue );
+   } 
+
+   if (rmesa->vb.vtxfmt_1 & (7 << R200_VTX_TEX0_COMP_CNT_SHIFT)) {
+      ctx->Current.Attrib[VERT_ATTRIB_TEX0][0] = rmesa->vb.texcoordptr[0][0];
+      ctx->Current.Attrib[VERT_ATTRIB_TEX0][1] = rmesa->vb.texcoordptr[0][1];
+      ctx->Current.Attrib[VERT_ATTRIB_TEX0][2] = 0.0F;
+      ctx->Current.Attrib[VERT_ATTRIB_TEX0][3] = 1.0F;
+   }
+
+   if (rmesa->vb.vtxfmt_1 & (7 << R200_VTX_TEX1_COMP_CNT_SHIFT)) {
+      ctx->Current.Attrib[VERT_ATTRIB_TEX1][0] = rmesa->vb.texcoordptr[1][0];
+      ctx->Current.Attrib[VERT_ATTRIB_TEX1][1] = rmesa->vb.texcoordptr[1][1];
+      ctx->Current.Attrib[VERT_ATTRIB_TEX1][2] = 0.0F;
+      ctx->Current.Attrib[VERT_ATTRIB_TEX1][3] = 1.0F;
+   }
+
+   ctx->Driver.NeedFlush &= ~FLUSH_UPDATE_CURRENT;
+}
+
+static GLboolean discreet_gl_prim[GL_POLYGON+1] = {
+   1,                          /* 0 points */
+   1,                          /* 1 lines */
+   0,                          /* 2 line_strip */
+   0,                          /* 3 line_loop */
+   1,                          /* 4 tris */
+   0,                          /* 5 tri_fan */
+   0,                          /* 6 tri_strip */
+   1,                          /* 7 quads */
+   0,                          /* 8 quadstrip */
+   0,                          /* 9 poly */
+};
+
+static void flush_prims( r200ContextPtr rmesa )
+{
+   int i,j;
+   struct r200_dma_region tmp = rmesa->dma.current;
+   
+   tmp.buf->refcount++;
+   tmp.aos_size = rmesa->vb.vertex_size;
+   tmp.aos_stride = rmesa->vb.vertex_size;
+   tmp.aos_start = GET_START(&tmp);
+
+   rmesa->dma.current.ptr = rmesa->dma.current.start += 
+      (rmesa->vb.initial_counter - rmesa->vb.counter) * 
+      rmesa->vb.vertex_size * 4; 
+
+   rmesa->tcl.vertex_format = rmesa->vb.vtxfmt_0;
+   rmesa->tcl.aos_components[0] = &tmp;
+   rmesa->tcl.nr_aos_components = 1;
+   rmesa->dma.flush = 0;
+
+   /* Optimize the primitive list:
+    */
+   if (rmesa->vb.nrprims > 1) {
+      for (j = 0, i = 1 ; i < rmesa->vb.nrprims; i++) {
+        int pj = rmesa->vb.primlist[j].prim & 0xf;
+        int pi = rmesa->vb.primlist[i].prim & 0xf;
+      
+        if (pj == pi && discreet_gl_prim[pj] &&
+            rmesa->vb.primlist[i].start == rmesa->vb.primlist[j].end) {
+           rmesa->vb.primlist[j].end = rmesa->vb.primlist[i].end;
+        }
+        else {
+           j++;
+           if (j != i) rmesa->vb.primlist[j] = rmesa->vb.primlist[i];
+        }
+      }
+      rmesa->vb.nrprims = j+1;
+   }
+
+   if (rmesa->vb.vtxfmt_0 != rmesa->hw.vtx.cmd[VTX_VTXFMT_0] ||
+       rmesa->vb.vtxfmt_1 != rmesa->hw.vtx.cmd[VTX_VTXFMT_1]) { 
+      R200_STATECHANGE( rmesa, vtx ); 
+      rmesa->hw.vtx.cmd[VTX_VTXFMT_0] = rmesa->vb.vtxfmt_0;
+      rmesa->hw.vtx.cmd[VTX_VTXFMT_1] = rmesa->vb.vtxfmt_1;
+   } 
+
+
+   for (i = 0 ; i < rmesa->vb.nrprims; i++) {
+      if (R200_DEBUG & DEBUG_PRIMS)
+        fprintf(stderr, "vtxfmt prim %d: %s %d..%d\n", i,
+                _mesa_lookup_enum_by_nr( rmesa->vb.primlist[i].prim & 
+                                         PRIM_MODE_MASK ),
+                rmesa->vb.primlist[i].start,
+                rmesa->vb.primlist[i].end);
+
+      if (rmesa->vb.primlist[i].start < rmesa->vb.primlist[i].end)
+        r200EmitPrimitive( rmesa->glCtx,
+                           rmesa->vb.primlist[i].start,
+                           rmesa->vb.primlist[i].end,
+                           rmesa->vb.primlist[i].prim );
+   }
+
+   rmesa->vb.nrprims = 0;
+   r200ReleaseDmaRegion( rmesa, &tmp, __FUNCTION__ );
+}
+
+
+static void start_prim( r200ContextPtr rmesa, GLuint mode )
+{
+   if (R200_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s %d\n", __FUNCTION__, 
+             rmesa->vb.initial_counter - rmesa->vb.counter);
+
+   rmesa->vb.primlist[rmesa->vb.nrprims].start = 
+      rmesa->vb.initial_counter - rmesa->vb.counter;
+   rmesa->vb.primlist[rmesa->vb.nrprims].prim = mode;
+}
+
+static void note_last_prim( r200ContextPtr rmesa, GLuint flags )
+{
+   if (R200_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s %d\n", __FUNCTION__, 
+             rmesa->vb.initial_counter - rmesa->vb.counter);
+
+   if (rmesa->vb.prim[0] != GL_POLYGON+1) {
+      rmesa->vb.primlist[rmesa->vb.nrprims].prim |= flags;
+      rmesa->vb.primlist[rmesa->vb.nrprims].end = 
+        rmesa->vb.initial_counter - rmesa->vb.counter;
+
+      if (++(rmesa->vb.nrprims) == R200_MAX_PRIMS)
+        flush_prims( rmesa );
+   }
+}
+
+
+static void copy_vertex( r200ContextPtr rmesa, GLuint n, GLfloat *dst )
+{
+   GLuint i;
+   GLfloat *src = (GLfloat *)(rmesa->dma.current.address + 
+                             rmesa->dma.current.ptr + 
+                             (rmesa->vb.primlist[rmesa->vb.nrprims].start + n) * 
+                             rmesa->vb.vertex_size * 4);
+
+   if (R200_DEBUG & DEBUG_VFMT) 
+      fprintf(stderr, "copy_vertex %d\n", rmesa->vb.primlist[rmesa->vb.nrprims].start + n);
+
+   for (i = 0 ; i < rmesa->vb.vertex_size; i++) {
+      dst[i] = src[i];
+   }
+}
+
+/* NOTE: This actually reads the copied vertices back from uncached
+ * memory.  Could also use the counter/notify mechanism to populate
+ * tmp on the fly as vertices are generated.  
+ */
+static GLuint copy_dma_verts( r200ContextPtr rmesa, GLfloat (*tmp)[15] )
+{
+   GLuint ovf, i;
+   GLuint nr = (rmesa->vb.initial_counter - rmesa->vb.counter) - 
+      rmesa->vb.primlist[rmesa->vb.nrprims].start;
+
+   if (R200_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s %d verts\n", __FUNCTION__, nr);
+
+   switch( rmesa->vb.prim[0] )
+   {
+   case GL_POINTS:
+      return 0;
+   case GL_LINES:
+      ovf = nr&1;
+      for (i = 0 ; i < ovf ; i++)
+        copy_vertex( rmesa, nr-ovf+i, tmp[i] );
+      return i;
+   case GL_TRIANGLES:
+      ovf = nr%3;
+      for (i = 0 ; i < ovf ; i++)
+        copy_vertex( rmesa, nr-ovf+i, tmp[i] );
+      return i;
+   case GL_QUADS:
+      ovf = nr&3;
+      for (i = 0 ; i < ovf ; i++)
+        copy_vertex( rmesa, nr-ovf+i, tmp[i] );
+      return i;
+   case GL_LINE_STRIP:
+      if (nr == 0) 
+        return 0;
+      copy_vertex( rmesa, nr-1, tmp[0] );
+      return 1;
+   case GL_LINE_LOOP:
+   case GL_TRIANGLE_FAN:
+   case GL_POLYGON:
+      if (nr == 0) 
+        return 0;
+      else if (nr == 1) {
+        copy_vertex( rmesa, 0, tmp[0] );
+        return 1;
+      } else {
+        copy_vertex( rmesa, 0, tmp[0] );
+        copy_vertex( rmesa, nr-1, tmp[1] );
+        return 2;
+      }
+   case GL_TRIANGLE_STRIP:
+      ovf = MIN2( nr, 2 );
+      for (i = 0 ; i < ovf ; i++)
+        copy_vertex( rmesa, nr-ovf+i, tmp[i] );
+      return i;
+   case GL_QUAD_STRIP:
+      switch (nr) {
+      case 0: ovf = 0; break;
+      case 1: ovf = 1; break;
+      default: ovf = 2 + (nr&1); break;
+      }
+      for (i = 0 ; i < ovf ; i++)
+        copy_vertex( rmesa, nr-ovf+i, tmp[i] );
+      return i;
+   default:
+      assert(0);
+      return 0;
+   }
+}
+
+static void VFMT_FALLBACK_OUTSIDE_BEGIN_END( const char *caller )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   if (R200_DEBUG & (DEBUG_VFMT|DEBUG_FALLBACKS))
+      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
+
+   if (ctx->Driver.NeedFlush) 
+      r200VtxFmtFlushVertices( ctx, ctx->Driver.NeedFlush );
+
+   if (ctx->NewState)
+      _mesa_update_state( ctx ); /* clear state so fell_back sticks */
+
+   _tnl_wakeup_exec( ctx );
+   ctx->Driver.FlushVertices = r200FlushVertices;
+
+   assert( rmesa->dma.flush == 0 );
+   rmesa->vb.fell_back = GL_TRUE;
+   rmesa->vb.installed = GL_FALSE;
+}
+
+
+static void VFMT_FALLBACK( const char *caller )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat tmp[3][15];
+   GLuint i, prim;
+   GLuint ind0 = rmesa->vb.vtxfmt_0;
+   GLuint ind1 = rmesa->vb.vtxfmt_1;
+   GLuint nrverts;
+   GLfloat alpha = 1.0;
+
+   if (R200_DEBUG & (DEBUG_FALLBACKS|DEBUG_VFMT))
+      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
+
+   if (rmesa->vb.prim[0] == GL_POLYGON+1) {
+      VFMT_FALLBACK_OUTSIDE_BEGIN_END( __FUNCTION__ );
+      return;
+   }
+
+   /* Copy vertices out of dma:
+    */
+   nrverts = copy_dma_verts( rmesa, tmp );
+
+   /* Finish the prim at this point:
+    */
+   note_last_prim( rmesa, 0 );
+   flush_prims( rmesa );
+
+   /* Update ctx->Driver.CurrentExecPrimitive and swap in swtnl. 
+    */
+   prim = rmesa->vb.prim[0];
+   ctx->Driver.CurrentExecPrimitive = GL_POLYGON+1;
+   _tnl_wakeup_exec( ctx );
+   ctx->Driver.FlushVertices = r200FlushVertices;
+
+   assert(rmesa->dma.flush == 0);
+   rmesa->vb.fell_back = GL_TRUE;
+   rmesa->vb.installed = GL_FALSE;
+   glBegin( prim );
+   
+   if (rmesa->vb.installed_color_3f_sz == 4)
+      alpha = ctx->Current.Attrib[VERT_ATTRIB_COLOR0][3];
+
+   /* Replay saved vertices
+    */
+   for (i = 0 ; i < nrverts; i++) {
+      GLuint offset = 3;
+      if (ind0 & R200_VTX_N0) {
+        glNormal3fv( &tmp[i][offset] ); 
+        offset += 3;
+      }
+
+      if (VTX_COLOR(ind0, 0) == R200_VTX_PK_RGBA) {
+        glColor4ubv( (GLubyte *)&tmp[i][offset] ); 
+        offset++;
+      }
+      else if (VTX_COLOR(ind0, 0) == R200_VTX_FP_RGBA) {
+        glColor4fv( &tmp[i][offset] ); 
+        offset+=4;
+      } 
+      else if (VTX_COLOR(ind0, 0) == R200_VTX_FP_RGB) {
+        glColor3fv( &tmp[i][offset] ); 
+        offset+=3;
+      }
+
+      if (VTX_COLOR(ind0, 1) == R200_VTX_PK_RGBA) {
+        _glapi_Dispatch->SecondaryColor3ubvEXT( (GLubyte *)&tmp[i][offset] ); 
+        offset++;
+      }
+
+      if (ind1 & (7 << R200_VTX_TEX0_COMP_CNT_SHIFT)) {
+        glTexCoord2fv( &tmp[i][offset] ); 
+        offset += 2;
+      }
+
+      if (ind1 & (7 << R200_VTX_TEX1_COMP_CNT_SHIFT)) {
+        glMultiTexCoord2fvARB( GL_TEXTURE1_ARB, &tmp[i][offset] );
+        offset += 2;
+      }
+
+      glVertex3fv( &tmp[i][0] );
+   }
+
+   /* Replay current vertex
+    */
+   if (ind0 & R200_VTX_N0) 
+      glNormal3fv( rmesa->vb.normalptr );
+
+   if (VTX_COLOR(ind0, 0) == R200_VTX_PK_RGBA) 
+         glColor4ub( rmesa->vb.colorptr->red, rmesa->vb.colorptr->green, rmesa->vb.colorptr->blue, rmesa->vb.colorptr->alpha );
+   else if (VTX_COLOR(ind0, 0) == R200_VTX_FP_RGBA) 
+      glColor4fv( rmesa->vb.floatcolorptr );
+   else if (VTX_COLOR(ind0, 0) == R200_VTX_FP_RGB) {
+      if (rmesa->vb.installed_color_3f_sz == 4 && alpha != 1.0)
+        glColor4f( rmesa->vb.floatcolorptr[0],
+                   rmesa->vb.floatcolorptr[1],
+                   rmesa->vb.floatcolorptr[2],
+                   alpha );
+      else
+        glColor3fv( rmesa->vb.floatcolorptr );
+   }
+
+   if (VTX_COLOR(ind0, 1) == R200_VTX_PK_RGBA) 
+      _glapi_Dispatch->SecondaryColor3ubEXT( rmesa->vb.specptr->red, rmesa->vb.specptr->green, rmesa->vb.specptr->blue ); 
+
+   if (ind1 & (7 << R200_VTX_TEX0_COMP_CNT_SHIFT)) 
+      glTexCoord2fv( rmesa->vb.texcoordptr[0] );
+
+   if (ind1 & (7 << R200_VTX_TEX1_COMP_CNT_SHIFT)) 
+      glMultiTexCoord2fvARB( GL_TEXTURE1_ARB, rmesa->vb.texcoordptr[1] );
+}
+
+
+
+static void wrap_buffer( void )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat tmp[3][15];
+   GLuint i, nrverts;
+
+   if (R200_DEBUG & (DEBUG_VFMT|DEBUG_PRIMS))
+      fprintf(stderr, "%s %d\n", __FUNCTION__,
+             rmesa->vb.initial_counter - rmesa->vb.counter);
+
+   /* Don't deal with parity.
+    */
+   if ((((rmesa->vb.initial_counter - rmesa->vb.counter) -  
+        rmesa->vb.primlist[rmesa->vb.nrprims].start) & 1)) {
+      rmesa->vb.counter++;
+      rmesa->vb.initial_counter++;
+      return;
+   }
+
+   /* Copy vertices out of dma:
+    */
+   if (rmesa->vb.prim[0] == GL_POLYGON+1) 
+      nrverts = 0;
+   else {
+      nrverts = copy_dma_verts( rmesa, tmp );
+
+      if (R200_DEBUG & DEBUG_VFMT)
+        fprintf(stderr, "%d vertices to copy\n", nrverts);
+   
+      /* Finish the prim at this point:
+       */
+      note_last_prim( rmesa, 0 );
+   }
+
+   /* Fire any buffered primitives
+    */
+   flush_prims( rmesa );
+
+   /* Get new buffer
+    */
+   r200RefillCurrentDmaRegion( rmesa );
+
+   /* Reset counter, dmaptr
+    */
+   rmesa->vb.dmaptr = (int *)(rmesa->dma.current.ptr + rmesa->dma.current.address);
+   rmesa->vb.counter = (rmesa->dma.current.end - rmesa->dma.current.ptr) / 
+      (rmesa->vb.vertex_size * 4);
+   rmesa->vb.counter--;
+   rmesa->vb.initial_counter = rmesa->vb.counter;
+   rmesa->vb.notify = wrap_buffer;
+
+   rmesa->dma.flush = flush_prims;
+
+   /* Restart wrapped primitive:
+    */
+   if (rmesa->vb.prim[0] != GL_POLYGON+1)
+      start_prim( rmesa, rmesa->vb.prim[0] );
+
+
+   /* Reemit saved vertices
+    */
+   for (i = 0 ; i < nrverts; i++) {
+      if (R200_DEBUG & DEBUG_VERTS) {
+        int j;
+        fprintf(stderr, "re-emit vertex %d to %p\n", i, rmesa->vb.dmaptr);
+        if (R200_DEBUG & DEBUG_VERBOSE)
+           for (j = 0 ; j < rmesa->vb.vertex_size; j++) 
+              fprintf(stderr, "\t%08x/%f\n", *(int*)&tmp[i][j], tmp[i][j]);
+      }
+
+      memcpy( rmesa->vb.dmaptr, tmp[i], rmesa->vb.vertex_size * 4 );
+      rmesa->vb.dmaptr += rmesa->vb.vertex_size;
+      rmesa->vb.counter--;
+   }
+}
+
+
+
+static GLboolean check_vtx_fmt( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint ind0 = R200_VTX_Z0;
+   GLuint ind1 = 0;
+
+   if (rmesa->TclFallback || rmesa->vb.fell_back || ctx->CompileFlag)
+      return GL_FALSE;
+   
+   if (ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT) 
+      ctx->Driver.FlushVertices( ctx, FLUSH_UPDATE_CURRENT );
+   
+   /* Make all this event-driven:
+    */
+   if (ctx->Light.Enabled) {
+      ind0 |= R200_VTX_N0;
+
+      /* TODO: make this data driven: If we receive only ubytes, send
+       * color as ubytes.  Also check if converting (with free
+       * checking for overflow) is cheaper than sending floats
+       * directly.
+       */
+      if (ctx->Light.ColorMaterialEnabled) 
+        ind0 |= R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT;
+      else
+        ind0 |= R200_VTX_PK_RGBA << R200_VTX_COLOR_0_SHIFT;
+   }
+   else {
+      /* TODO: make this data driven?
+       */
+      ind0 |= R200_VTX_PK_RGBA << R200_VTX_COLOR_0_SHIFT;
+        
+      if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) {
+        ind0 |= R200_VTX_PK_RGBA << R200_VTX_COLOR_1_SHIFT;
+      }
+   }
+
+   if (ctx->Texture.Unit[0]._ReallyEnabled) {
+      if (ctx->Texture.Unit[0].TexGenEnabled) {
+        if (rmesa->TexGenNeedNormals[0]) {
+           ind0 |= R200_VTX_N0;
+        }
+      } else {
+        if (ctx->Current.Attrib[VERT_ATTRIB_TEX0][2] != 0.0F ||
+            ctx->Current.Attrib[VERT_ATTRIB_TEX0][3] != 1.0) {
+           if (R200_DEBUG & (DEBUG_VFMT|DEBUG_FALLBACKS))
+              fprintf(stderr, "%s: rq0\n", __FUNCTION__);
+           return GL_FALSE;
+        }
+        ind1 |= 2 << R200_VTX_TEX0_COMP_CNT_SHIFT;
+      }
+   }
+
+   if (ctx->Texture.Unit[1]._ReallyEnabled) {
+      if (ctx->Texture.Unit[1].TexGenEnabled) {
+        if (rmesa->TexGenNeedNormals[1]) {
+           ind0 |= R200_VTX_N0;
+        }
+      } else {
+        if (ctx->Current.Attrib[VERT_ATTRIB_TEX1][2] != 0.0F ||
+            ctx->Current.Attrib[VERT_ATTRIB_TEX1][3] != 1.0) {
+           if (R200_DEBUG & (DEBUG_VFMT|DEBUG_FALLBACKS))
+              fprintf(stderr, "%s: rq1\n", __FUNCTION__);
+           return GL_FALSE;
+        }
+        ind1 |= 2 << R200_VTX_TEX1_COMP_CNT_SHIFT;
+      }
+   }
+
+   if (R200_DEBUG & (DEBUG_VFMT|DEBUG_STATE))
+      fprintf(stderr, "%s: format: 0x%x, 0x%x\n", __FUNCTION__, ind0, ind1 );
+
+   R200_NEWPRIM(rmesa);
+   rmesa->vb.vtxfmt_0 = ind0;
+   rmesa->vb.vtxfmt_1 = ind1;
+   rmesa->vb.prim = &ctx->Driver.CurrentExecPrimitive;
+
+   rmesa->vb.vertex_size = 3;
+   rmesa->vb.normalptr = ctx->Current.Attrib[VERT_ATTRIB_NORMAL];
+   rmesa->vb.colorptr = NULL;
+   rmesa->vb.floatcolorptr = ctx->Current.Attrib[VERT_ATTRIB_COLOR0];
+   rmesa->vb.specptr = NULL;
+   rmesa->vb.floatspecptr = ctx->Current.Attrib[VERT_ATTRIB_COLOR1];
+   rmesa->vb.texcoordptr[0] = ctx->Current.Attrib[VERT_ATTRIB_TEX0];
+   rmesa->vb.texcoordptr[1] = ctx->Current.Attrib[VERT_ATTRIB_TEX1];
+
+   /* Run through and initialize the vertex components in the order
+    * the hardware understands:
+    */
+   if (ind0 & R200_VTX_N0) {
+      rmesa->vb.normalptr = &rmesa->vb.vertex[rmesa->vb.vertex_size].f;
+      rmesa->vb.vertex_size += 3;
+      rmesa->vb.normalptr[0] = ctx->Current.Attrib[VERT_ATTRIB_NORMAL][0];
+      rmesa->vb.normalptr[1] = ctx->Current.Attrib[VERT_ATTRIB_NORMAL][1];
+      rmesa->vb.normalptr[2] = ctx->Current.Attrib[VERT_ATTRIB_NORMAL][2];
+   }
+
+   if (VTX_COLOR(ind0, 0) == R200_VTX_PK_RGBA) {
+      rmesa->vb.colorptr = &rmesa->vb.vertex[rmesa->vb.vertex_size].color;
+      rmesa->vb.vertex_size += 1;
+      UNCLAMPED_FLOAT_TO_CHAN( rmesa->vb.colorptr->red,   ctx->Current.Attrib[VERT_ATTRIB_COLOR0][0] );
+      UNCLAMPED_FLOAT_TO_CHAN( rmesa->vb.colorptr->green, ctx->Current.Attrib[VERT_ATTRIB_COLOR0][1] );
+      UNCLAMPED_FLOAT_TO_CHAN( rmesa->vb.colorptr->blue,  ctx->Current.Attrib[VERT_ATTRIB_COLOR0][2] );
+      UNCLAMPED_FLOAT_TO_CHAN( rmesa->vb.colorptr->alpha, ctx->Current.Attrib[VERT_ATTRIB_COLOR0][3] );
+   }
+   else if (VTX_COLOR(ind0, 0) == R200_VTX_FP_RGBA) {
+      rmesa->vb.floatcolorptr = &rmesa->vb.vertex[rmesa->vb.vertex_size].f;
+      rmesa->vb.vertex_size += 4;
+      rmesa->vb.floatcolorptr[0] = ctx->Current.Attrib[VERT_ATTRIB_COLOR0][0];
+      rmesa->vb.floatcolorptr[1] = ctx->Current.Attrib[VERT_ATTRIB_COLOR0][1];
+      rmesa->vb.floatcolorptr[2] = ctx->Current.Attrib[VERT_ATTRIB_COLOR0][2];
+      rmesa->vb.floatcolorptr[3] = ctx->Current.Attrib[VERT_ATTRIB_COLOR0][3];
+   }
+   else if (VTX_COLOR(ind0, 0) == R200_VTX_FP_RGB) {
+      rmesa->vb.floatcolorptr = &rmesa->vb.vertex[rmesa->vb.vertex_size].f;
+      rmesa->vb.vertex_size += 3;
+      rmesa->vb.floatcolorptr[0] = ctx->Current.Attrib[VERT_ATTRIB_COLOR0][0];
+      rmesa->vb.floatcolorptr[1] = ctx->Current.Attrib[VERT_ATTRIB_COLOR0][1];
+      rmesa->vb.floatcolorptr[2] = ctx->Current.Attrib[VERT_ATTRIB_COLOR0][2];
+   }   
+   
+   if (VTX_COLOR(ind0, 1) == R200_VTX_PK_RGBA) {
+      rmesa->vb.specptr = &rmesa->vb.vertex[rmesa->vb.vertex_size].color;
+      rmesa->vb.vertex_size += 1;
+      UNCLAMPED_FLOAT_TO_CHAN( rmesa->vb.specptr->red,   ctx->Current.Attrib[VERT_ATTRIB_COLOR1][0] );
+      UNCLAMPED_FLOAT_TO_CHAN( rmesa->vb.specptr->green, ctx->Current.Attrib[VERT_ATTRIB_COLOR1][1] );
+      UNCLAMPED_FLOAT_TO_CHAN( rmesa->vb.specptr->blue,  ctx->Current.Attrib[VERT_ATTRIB_COLOR1][2] );
+   }
+
+
+   if (ind1 & (7 << R200_VTX_TEX0_COMP_CNT_SHIFT)) {
+      rmesa->vb.texcoordptr[0] = &rmesa->vb.vertex[rmesa->vb.vertex_size].f;
+      rmesa->vb.vertex_size += 2;
+      rmesa->vb.texcoordptr[0][0] = ctx->Current.Attrib[VERT_ATTRIB_TEX0][0];
+      rmesa->vb.texcoordptr[0][1] = ctx->Current.Attrib[VERT_ATTRIB_TEX0][1];   
+   } 
+
+   if (ind1 & (7 << R200_VTX_TEX1_COMP_CNT_SHIFT)) {
+      rmesa->vb.texcoordptr[1] = &rmesa->vb.vertex[rmesa->vb.vertex_size].f;
+      rmesa->vb.vertex_size += 2;
+      rmesa->vb.texcoordptr[1][0] = ctx->Current.Attrib[VERT_ATTRIB_TEX1][0];
+      rmesa->vb.texcoordptr[1][1] = ctx->Current.Attrib[VERT_ATTRIB_TEX1][1];
+   } 
+
+   if (rmesa->vb.installed_vertex_format != rmesa->vb.vtxfmt_0) {
+      if (R200_DEBUG & DEBUG_VFMT)
+        fprintf(stderr, "reinstall on vertex_format change\n");
+      _mesa_install_exec_vtxfmt( ctx, &rmesa->vb.vtxfmt );
+      rmesa->vb.installed_vertex_format = rmesa->vb.vtxfmt_0;
+   }
+
+   if (R200_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s -- success\n", __FUNCTION__);
+   
+   return GL_TRUE;
+}
+
+
+void r200VtxfmtInvalidate( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+
+   rmesa->vb.recheck = GL_TRUE;
+   rmesa->vb.fell_back = GL_FALSE;
+}
+
+
+static void r200NewList( GLcontext *ctx, GLuint list, GLenum mode )
+{
+   VFMT_FALLBACK_OUTSIDE_BEGIN_END( __FUNCTION__ );
+}
+
+
+static void r200VtxfmtValidate( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+
+   if (R200_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (ctx->Driver.NeedFlush)
+      ctx->Driver.FlushVertices( ctx, ctx->Driver.NeedFlush );
+
+   rmesa->vb.recheck = GL_FALSE;
+
+   if (check_vtx_fmt( ctx )) {
+      if (!rmesa->vb.installed) {
+        if (R200_DEBUG & DEBUG_VFMT)
+           fprintf(stderr, "reinstall (new install)\n");
+
+        _mesa_install_exec_vtxfmt( ctx, &rmesa->vb.vtxfmt );
+        ctx->Driver.FlushVertices = r200VtxFmtFlushVertices;
+        ctx->Driver.NewList = r200NewList;
+        rmesa->vb.installed = GL_TRUE;
+      }
+      else if (R200_DEBUG & DEBUG_VFMT)
+        fprintf(stderr, "%s: already installed", __FUNCTION__);
+   } 
+   else {
+      if (R200_DEBUG & DEBUG_VFMT)
+        fprintf(stderr, "%s: failed\n", __FUNCTION__);
+
+      if (rmesa->vb.installed) {
+        if (rmesa->dma.flush)
+           rmesa->dma.flush( rmesa );
+        _tnl_wakeup_exec( ctx );
+        ctx->Driver.FlushVertices = r200FlushVertices;
+        rmesa->vb.installed = GL_FALSE;
+      }
+   }      
+}
+
+
+
+/* Materials:
+ */
+static void r200_Materialfv( GLenum face, GLenum pname, 
+                              const GLfloat *params )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+
+   if (R200_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (rmesa->vb.prim[0] != GL_POLYGON+1) {
+      VFMT_FALLBACK( __FUNCTION__ );
+      glMaterialfv( face, pname, params );
+      return;
+   }
+   _mesa_noop_Materialfv( face, pname, params );
+   r200UpdateMaterial( ctx );
+}
+
+
+/* Begin/End
+ */
+static void r200_Begin( GLenum mode )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   if (R200_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s( %s )\n", __FUNCTION__,
+             _mesa_lookup_enum_by_nr( mode ));
+
+   if (mode > GL_POLYGON) {
+      _mesa_error( ctx, GL_INVALID_ENUM, "glBegin" );
+      return;
+   }
+
+   if (rmesa->vb.prim[0] != GL_POLYGON+1) {
+      _mesa_error( ctx, GL_INVALID_OPERATION, "glBegin" );
+      return;
+   }
+   
+   if (ctx->NewState) 
+      _mesa_update_state( ctx );
+
+   if (rmesa->NewGLState)
+      r200ValidateState( ctx );
+
+   if (rmesa->vb.recheck) 
+      r200VtxfmtValidate( ctx );
+
+   if (!rmesa->vb.installed) {
+      glBegin( mode );
+      return;
+   }
+
+
+   if (rmesa->dma.flush && rmesa->vb.counter < 12) {
+      if (R200_DEBUG & DEBUG_VFMT)
+        fprintf(stderr, "%s: flush almost-empty buffers\n", __FUNCTION__);
+      flush_prims( rmesa );
+   }
+
+   /* Need to arrange to save vertices here?  Or always copy from dma (yuk)?
+    */
+   if (!rmesa->dma.flush) {
+      if (rmesa->dma.current.ptr + 12*rmesa->vb.vertex_size*4 > 
+         rmesa->dma.current.end) {
+        R200_NEWPRIM( rmesa );
+        r200RefillCurrentDmaRegion( rmesa );
+      }
+
+      rmesa->vb.dmaptr = (int *)(rmesa->dma.current.address + rmesa->dma.current.ptr);
+      rmesa->vb.counter = (rmesa->dma.current.end - rmesa->dma.current.ptr) / 
+        (rmesa->vb.vertex_size * 4);
+      rmesa->vb.counter--;
+      rmesa->vb.initial_counter = rmesa->vb.counter;
+      rmesa->vb.notify = wrap_buffer;
+      rmesa->dma.flush = flush_prims;
+      ctx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+   }
+   
+   
+   rmesa->vb.prim[0] = mode;
+   start_prim( rmesa, mode | PRIM_BEGIN );
+}
+
+
+
+static void r200_End( void )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   if (R200_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (rmesa->vb.prim[0] == GL_POLYGON+1) {
+      _mesa_error( ctx, GL_INVALID_OPERATION, "glEnd" );
+      return;
+   }
+         
+   note_last_prim( rmesa, PRIM_END );
+   rmesa->vb.prim[0] = GL_POLYGON+1;
+}
+
+
+/* Fallback on difficult entrypoints:
+ */
+#define PRE_LOOPBACK( FUNC )                   \
+do {                                           \
+   if (R200_DEBUG & DEBUG_VFMT)                \
+      fprintf(stderr, "%s\n", __FUNCTION__);   \
+   VFMT_FALLBACK( __FUNCTION__ );              \
+} while (0)
+#define TAG(x) r200_fallback_##x
+#include "vtxfmt_tmp.h"
+
+
+
+static GLboolean r200NotifyBegin( GLcontext *ctx, GLenum p )
+{
+   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+   
+   if (R200_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   assert(!rmesa->vb.installed);
+
+   if (ctx->NewState) 
+      _mesa_update_state( ctx );
+
+   if (rmesa->NewGLState)
+      r200ValidateState( ctx );
+
+   if (ctx->Driver.NeedFlush)
+      ctx->Driver.FlushVertices( ctx, ctx->Driver.NeedFlush );
+
+   if (rmesa->vb.recheck) 
+      r200VtxfmtValidate( ctx );
+
+   if (!rmesa->vb.installed) {
+      if (R200_DEBUG & DEBUG_VFMT)
+        fprintf(stderr, "%s -- failed\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   r200_Begin( p );
+   return GL_TRUE;
+}
+
+static void r200VtxFmtFlushVertices( GLcontext *ctx, GLuint flags )
+{
+   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+
+   if (R200_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   assert(rmesa->vb.installed);
+
+   if (flags & FLUSH_UPDATE_CURRENT) {
+      r200_copy_to_current( ctx );
+      if (R200_DEBUG & DEBUG_VFMT)
+        fprintf(stderr, "reinstall on update_current\n");
+      _mesa_install_exec_vtxfmt( ctx, &rmesa->vb.vtxfmt );
+      ctx->Driver.NeedFlush &= ~FLUSH_UPDATE_CURRENT;
+   }
+
+   if (flags & FLUSH_STORED_VERTICES) {
+      assert (rmesa->dma.flush == 0 ||
+             rmesa->dma.flush == flush_prims);
+      if (rmesa->dma.flush == flush_prims)
+        flush_prims( rmesa );
+      ctx->Driver.NeedFlush &= ~FLUSH_STORED_VERTICES;
+   }
+}
+
+
+
+/* At this point, don't expect very many versions of each function to
+ * be generated, so not concerned about freeing them?
+ */
+
+
+void r200VtxfmtInit( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+   GLvertexformat *vfmt = &(rmesa->vb.vtxfmt);
+
+   MEMSET( vfmt, 0, sizeof(GLvertexformat) );
+
+   /* Hook in chooser functions for codegen, etc:
+    */
+   r200VtxfmtInitChoosers( vfmt );
+
+   /* Handled fully in supported states, but no codegen:
+    */
+   vfmt->Materialfv = r200_Materialfv;
+   vfmt->ArrayElement = _ae_loopback_array_elt;                /* generic helper */
+   vfmt->Rectf = _mesa_noop_Rectf;                     /* generic helper */
+   vfmt->Begin = r200_Begin;
+   vfmt->End = r200_End;
+
+   /* Fallback for performance reasons:  (Fix with cva/elt path here and
+    * dmatmp2.h style primitive-merging)
+    *
+    * These should call NotifyBegin(), as should _tnl_EvalMesh, to allow
+    * a driver-hook.
+    */
+   vfmt->DrawArrays = r200_fallback_DrawArrays;
+   vfmt->DrawElements = r200_fallback_DrawElements;
+   vfmt->DrawRangeElements = r200_fallback_DrawRangeElements; 
+
+
+   /* Not active in supported states; just keep ctx->Current uptodate:
+    */
+   vfmt->FogCoordfvEXT = _mesa_noop_FogCoordfvEXT;
+   vfmt->FogCoordfEXT = _mesa_noop_FogCoordfEXT;
+   vfmt->EdgeFlag = _mesa_noop_EdgeFlag;
+   vfmt->EdgeFlagv = _mesa_noop_EdgeFlagv;
+   vfmt->Indexi = _mesa_noop_Indexi;
+   vfmt->Indexiv = _mesa_noop_Indexiv;
+
+
+   /* Active but unsupported -- fallback if we receive these:
+    */
+   vfmt->CallList = r200_fallback_CallList;
+   vfmt->EvalCoord1f = r200_fallback_EvalCoord1f;
+   vfmt->EvalCoord1fv = r200_fallback_EvalCoord1fv;
+   vfmt->EvalCoord2f = r200_fallback_EvalCoord2f;
+   vfmt->EvalCoord2fv = r200_fallback_EvalCoord2fv;
+   vfmt->EvalMesh1 = r200_fallback_EvalMesh1;
+   vfmt->EvalMesh2 = r200_fallback_EvalMesh2;
+   vfmt->EvalPoint1 = r200_fallback_EvalPoint1;
+   vfmt->EvalPoint2 = r200_fallback_EvalPoint2;
+   vfmt->TexCoord3f = r200_fallback_TexCoord3f;
+   vfmt->TexCoord3fv = r200_fallback_TexCoord3fv;
+   vfmt->TexCoord4f = r200_fallback_TexCoord4f;
+   vfmt->TexCoord4fv = r200_fallback_TexCoord4fv;
+   vfmt->MultiTexCoord3fARB = r200_fallback_MultiTexCoord3fARB;
+   vfmt->MultiTexCoord3fvARB = r200_fallback_MultiTexCoord3fvARB;
+   vfmt->MultiTexCoord4fARB = r200_fallback_MultiTexCoord4fARB;
+   vfmt->MultiTexCoord4fvARB = r200_fallback_MultiTexCoord4fvARB;
+   vfmt->Vertex4f = r200_fallback_Vertex4f;
+   vfmt->Vertex4fv = r200_fallback_Vertex4fv;
+
+   (void)r200_fallback_vtxfmt;
+
+   TNL_CONTEXT(ctx)->Driver.NotifyBegin = r200NotifyBegin;
+
+   rmesa->vb.enabled = 1;
+   rmesa->vb.prim = &ctx->Driver.CurrentExecPrimitive;
+   rmesa->vb.primflags = 0;
+
+   make_empty_list( &rmesa->vb.dfn_cache.Vertex2f );
+   make_empty_list( &rmesa->vb.dfn_cache.Vertex2fv );
+   make_empty_list( &rmesa->vb.dfn_cache.Vertex3f );
+   make_empty_list( &rmesa->vb.dfn_cache.Vertex3fv );
+   make_empty_list( &rmesa->vb.dfn_cache.Color4ub );
+   make_empty_list( &rmesa->vb.dfn_cache.Color4ubv );
+   make_empty_list( &rmesa->vb.dfn_cache.Color3ub );
+   make_empty_list( &rmesa->vb.dfn_cache.Color3ubv );
+   make_empty_list( &rmesa->vb.dfn_cache.Color4f );
+   make_empty_list( &rmesa->vb.dfn_cache.Color4fv );
+   make_empty_list( &rmesa->vb.dfn_cache.Color3f );
+   make_empty_list( &rmesa->vb.dfn_cache.Color3fv );
+   make_empty_list( &rmesa->vb.dfn_cache.SecondaryColor3fEXT );
+   make_empty_list( &rmesa->vb.dfn_cache.SecondaryColor3fvEXT );
+   make_empty_list( &rmesa->vb.dfn_cache.SecondaryColor3ubEXT );
+   make_empty_list( &rmesa->vb.dfn_cache.SecondaryColor3ubvEXT );
+   make_empty_list( &rmesa->vb.dfn_cache.Normal3f );
+   make_empty_list( &rmesa->vb.dfn_cache.Normal3fv );
+   make_empty_list( &rmesa->vb.dfn_cache.TexCoord2f );
+   make_empty_list( &rmesa->vb.dfn_cache.TexCoord2fv );
+   make_empty_list( &rmesa->vb.dfn_cache.TexCoord1f );
+   make_empty_list( &rmesa->vb.dfn_cache.TexCoord1fv );
+   make_empty_list( &rmesa->vb.dfn_cache.MultiTexCoord2fARB );
+   make_empty_list( &rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
+   make_empty_list( &rmesa->vb.dfn_cache.MultiTexCoord1fARB );
+   make_empty_list( &rmesa->vb.dfn_cache.MultiTexCoord1fvARB );
+
+   r200InitCodegen( &rmesa->vb.codegen );
+}
+
+static void free_funcs( struct dynfn *l )
+{
+   struct dynfn *f, *tmp;
+   foreach_s (f, tmp, l) {
+      remove_from_list( f );
+      ALIGN_FREE( f->code );
+      FREE( f );
+   }
+}
+
+void r200VtxfmtUnbindContext( GLcontext *ctx )
+{
+}
+
+
+void r200VtxfmtMakeCurrent( GLcontext *ctx )
+{
+}
+
+
+void r200VtxfmtDestroy( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+
+   count_funcs( rmesa );
+   free_funcs( &rmesa->vb.dfn_cache.Vertex2f );
+   free_funcs( &rmesa->vb.dfn_cache.Vertex2fv );
+   free_funcs( &rmesa->vb.dfn_cache.Vertex3f );
+   free_funcs( &rmesa->vb.dfn_cache.Vertex3fv );
+   free_funcs( &rmesa->vb.dfn_cache.Color4ub );
+   free_funcs( &rmesa->vb.dfn_cache.Color4ubv );
+   free_funcs( &rmesa->vb.dfn_cache.Color3ub );
+   free_funcs( &rmesa->vb.dfn_cache.Color3ubv );
+   free_funcs( &rmesa->vb.dfn_cache.Color4f );
+   free_funcs( &rmesa->vb.dfn_cache.Color4fv );
+   free_funcs( &rmesa->vb.dfn_cache.Color3f );
+   free_funcs( &rmesa->vb.dfn_cache.Color3fv );
+   free_funcs( &rmesa->vb.dfn_cache.SecondaryColor3ubEXT );
+   free_funcs( &rmesa->vb.dfn_cache.SecondaryColor3ubvEXT );
+   free_funcs( &rmesa->vb.dfn_cache.SecondaryColor3fEXT );
+   free_funcs( &rmesa->vb.dfn_cache.SecondaryColor3fvEXT );
+   free_funcs( &rmesa->vb.dfn_cache.Normal3f );
+   free_funcs( &rmesa->vb.dfn_cache.Normal3fv );
+   free_funcs( &rmesa->vb.dfn_cache.TexCoord2f );
+   free_funcs( &rmesa->vb.dfn_cache.TexCoord2fv );
+   free_funcs( &rmesa->vb.dfn_cache.TexCoord1f );
+   free_funcs( &rmesa->vb.dfn_cache.TexCoord1fv );
+   free_funcs( &rmesa->vb.dfn_cache.MultiTexCoord2fARB );
+   free_funcs( &rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
+   free_funcs( &rmesa->vb.dfn_cache.MultiTexCoord1fARB );
+   free_funcs( &rmesa->vb.dfn_cache.MultiTexCoord1fvARB );
+}
+
diff --git a/src/mesa/drivers/dri/r200/r200_vtxfmt.h b/src/mesa/drivers/dri/r200/r200_vtxfmt.h
new file mode 100644 (file)
index 0000000..fde3eca
--- /dev/null
@@ -0,0 +1,127 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef __R200_VTXFMT_H__
+#define __R200_VTXFMT_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+#include "r200_context.h"
+
+
+
+extern void r200VtxfmtUpdate( GLcontext *ctx );
+extern void r200VtxfmtInit( GLcontext *ctx );
+extern void r200VtxfmtInvalidate( GLcontext *ctx );
+extern void r200VtxfmtDestroy( GLcontext *ctx );
+extern void r200VtxfmtInitChoosers( GLvertexformat *vfmt );
+
+extern void r200VtxfmtMakeCurrent( GLcontext *ctx );
+extern void r200VtxfmtUnbindContext( GLcontext *ctx );
+
+extern void r200_copy_to_current( GLcontext *ctx );
+
+#define DFN( FUNC, CACHE)                              \
+do {                                                   \
+   char *start = (char *)&FUNC;                                \
+   char *end = (char *)&FUNC##_end;                    \
+   insert_at_head( &CACHE, dfn );                      \
+   dfn->key[0] = key[0];                                       \
+   dfn->key[1] = key[1];                                       \
+   dfn->code = ALIGN_MALLOC( end - start, 16 );                \
+   memcpy (dfn->code, start, end - start);             \
+}                                                      \
+while ( 0 )
+
+#define FIXUP( CODE, OFFSET, CHECKVAL, NEWVAL )        \
+do {                                           \
+   int *icode = (int *)(CODE+OFFSET);          \
+   assert (*icode == CHECKVAL);                        \
+   *icode = (int)NEWVAL;                       \
+} while (0)
+
+
+/* Useful for figuring out the offsets:
+ */
+#define FIXUP2( CODE, OFFSET, CHECKVAL, NEWVAL )               \
+do {                                                           \
+   while (*(int *)(CODE+OFFSET) != CHECKVAL) OFFSET++;         \
+   /*fprintf(stderr, "%s/%d CVAL %x OFFSET %d VAL %x\n", __FUNCTION__,*/ \
+   /*     __LINE__, CHECKVAL, OFFSET, (int)(NEWVAL));*/                \
+   *(int *)(CODE+OFFSET) = (int)(NEWVAL);                              \
+   OFFSET += 4;                                                        \
+} while (0)
+
+/* 
+ */
+void r200InitCodegen( struct dfn_generators *gen );
+void r200InitX86Codegen( struct dfn_generators *gen );
+void r200InitSSECodegen( struct dfn_generators *gen );
+
+
+
+/* Defined in r200_vtxfmt_x86.c
+ */
+struct dynfn *r200_makeX86Vertex2f( GLcontext *, const int * );
+struct dynfn *r200_makeX86Vertex2fv( GLcontext *, const int * );
+struct dynfn *r200_makeX86Vertex3f( GLcontext *, const int * );
+struct dynfn *r200_makeX86Vertex3fv( GLcontext *, const int * );
+struct dynfn *r200_makeX86Color4ub( GLcontext *, const int * );
+struct dynfn *r200_makeX86Color4ubv( GLcontext *, const int * );
+struct dynfn *r200_makeX86Color3ub( GLcontext *, const int * );
+struct dynfn *r200_makeX86Color3ubv( GLcontext *, const int * );
+struct dynfn *r200_makeX86Color4f( GLcontext *, const int * );
+struct dynfn *r200_makeX86Color4fv( GLcontext *, const int * );
+struct dynfn *r200_makeX86Color3f( GLcontext *, const int * );
+struct dynfn *r200_makeX86Color3fv( GLcontext *, const int * );
+struct dynfn *r200_makeX86SecondaryColor3ubEXT( GLcontext *, const int * );
+struct dynfn *r200_makeX86SecondaryColor3ubvEXT( GLcontext *, const int * );
+struct dynfn *r200_makeX86SecondaryColor3fEXT( GLcontext *, const int * );
+struct dynfn *r200_makeX86SecondaryColor3fvEXT( GLcontext *, const int * );
+struct dynfn *r200_makeX86Normal3f( GLcontext *, const int * );
+struct dynfn *r200_makeX86Normal3fv( GLcontext *, const int * );
+struct dynfn *r200_makeX86TexCoord2f( GLcontext *, const int * );
+struct dynfn *r200_makeX86TexCoord2fv( GLcontext *, const int * );
+struct dynfn *r200_makeX86TexCoord1f( GLcontext *, const int * );
+struct dynfn *r200_makeX86TexCoord1fv( GLcontext *, const int * );
+struct dynfn *r200_makeX86MultiTexCoord2fARB( GLcontext *, const int * );
+struct dynfn *r200_makeX86MultiTexCoord2fvARB( GLcontext *, const int * );
+struct dynfn *r200_makeX86MultiTexCoord1fARB( GLcontext *, const int * );
+struct dynfn *r200_makeX86MultiTexCoord1fvARB( GLcontext *, const int * );
+
+
+#endif
+#endif
diff --git a/src/mesa/drivers/dri/r200/r200_vtxfmt_c.c b/src/mesa/drivers/dri/r200/r200_vtxfmt_c.c
new file mode 100644 (file)
index 0000000..b7f3fec
--- /dev/null
@@ -0,0 +1,901 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "glheader.h"
+#include "imports.h"
+#include "mtypes.h"
+#include "colormac.h"
+#include "simple_list.h"
+#include "api_noop.h"
+#include "vtxfmt.h"
+
+#include "r200_vtxfmt.h"
+
+/* Fallback versions of all the entrypoints for situations where
+ * codegen isn't available.  This is still a lot faster than the
+ * vb/pipeline implementation in Mesa.
+ */
+static void r200_Vertex3f( GLfloat x, GLfloat y, GLfloat z )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   int i;
+
+   *rmesa->vb.dmaptr++ = *(int *)&x;
+   *rmesa->vb.dmaptr++ = *(int *)&y;
+   *rmesa->vb.dmaptr++ = *(int *)&z;
+
+   for (i = 3; i < rmesa->vb.vertex_size; i++)
+      *rmesa->vb.dmaptr++ = rmesa->vb.vertex[i].i;
+   
+   if (--rmesa->vb.counter == 0)
+      rmesa->vb.notify();
+}
+
+
+static void r200_Vertex3fv( const GLfloat *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   int i;
+
+   *rmesa->vb.dmaptr++ = *(int *)&v[0];
+   *rmesa->vb.dmaptr++ = *(int *)&v[1];
+   *rmesa->vb.dmaptr++ = *(int *)&v[2];
+
+   for (i = 3; i < rmesa->vb.vertex_size; i++)
+      *rmesa->vb.dmaptr++ = rmesa->vb.vertex[i].i;
+   
+   if (--rmesa->vb.counter == 0)
+      rmesa->vb.notify();
+}
+
+
+static void r200_Vertex2f( GLfloat x, GLfloat y )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   int i;
+   
+   *rmesa->vb.dmaptr++ = *(int *)&x;
+   *rmesa->vb.dmaptr++ = *(int *)&y;
+   *rmesa->vb.dmaptr++ = 0;
+
+   for (i = 3; i < rmesa->vb.vertex_size; i++)
+      *rmesa->vb.dmaptr++ = rmesa->vb.vertex[i].i;
+
+   if (--rmesa->vb.counter == 0)
+      rmesa->vb.notify();
+}
+
+
+static void r200_Vertex2fv( const GLfloat *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   int i;
+
+   *rmesa->vb.dmaptr++ = *(int *)&v[0];
+   *rmesa->vb.dmaptr++ = *(int *)&v[1];
+   *rmesa->vb.dmaptr++ = 0;
+
+   for (i = 3; i < rmesa->vb.vertex_size; i++)
+      *rmesa->vb.dmaptr++ = rmesa->vb.vertex[i].i;
+   
+   if (--rmesa->vb.counter == 0)
+      rmesa->vb.notify();
+}
+
+
+
+/* Color for ubyte (packed) color formats:
+ */
+static void r200_Color3ub_ub( GLubyte r, GLubyte g, GLubyte b )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   r200_color_t *dest = rmesa->vb.colorptr;
+   dest->red   = r;
+   dest->green = g;
+   dest->blue  = b;
+   dest->alpha = 0xff;
+}
+
+static void r200_Color3ubv_ub( const GLubyte *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   r200_color_t *dest = rmesa->vb.colorptr;
+   dest->red   = v[0];
+   dest->green = v[1];
+   dest->blue  = v[2];
+   dest->alpha = 0xff;
+}
+
+static void r200_Color4ub_ub( GLubyte r, GLubyte g, GLubyte b, GLubyte a )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   r200_color_t *dest = rmesa->vb.colorptr;
+   dest->red   = r;
+   dest->green = g;
+   dest->blue  = b;
+   dest->alpha = a;
+}
+
+static void r200_Color4ubv_ub( const GLubyte *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   *(GLuint *)rmesa->vb.colorptr = LE32_TO_CPU(*(GLuint *)v);
+}
+
+
+static void r200_Color3f_ub( GLfloat r, GLfloat g, GLfloat b )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   r200_color_t *dest = rmesa->vb.colorptr;
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->red,   r );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->green, g );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->blue,  b );
+   dest->alpha = 255;
+}
+
+static void r200_Color3fv_ub( const GLfloat *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   r200_color_t *dest = rmesa->vb.colorptr;
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->red,   v[0] );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->green, v[1] );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->blue,  v[2] );
+   dest->alpha = 255;
+}
+
+static void r200_Color4f_ub( GLfloat r, GLfloat g, GLfloat b, GLfloat a )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   r200_color_t *dest = rmesa->vb.colorptr;
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->red,   r );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->green, g );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->blue,  b );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->alpha, a );
+}
+
+static void r200_Color4fv_ub( const GLfloat *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   r200_color_t *dest = rmesa->vb.colorptr;
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->red,          v[0] );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->green, v[1] );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->blue,  v[2] );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->alpha, v[3] );
+}
+
+
+/* Color for float color+alpha formats:
+ */
+static void r200_Color3ub_4f( GLubyte r, GLubyte g, GLubyte b )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatcolorptr;
+   dest[0] = UBYTE_TO_FLOAT(r);
+   dest[1] = UBYTE_TO_FLOAT(g);
+   dest[2] = UBYTE_TO_FLOAT(b);
+   dest[3] = 1.0;
+}
+
+static void r200_Color3ubv_4f( const GLubyte *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatcolorptr;
+   dest[0] = UBYTE_TO_FLOAT(v[0]);
+   dest[1] = UBYTE_TO_FLOAT(v[1]);
+   dest[2] = UBYTE_TO_FLOAT(v[2]);
+   dest[3] = 1.0;
+}
+
+static void r200_Color4ub_4f( GLubyte r, GLubyte g, GLubyte b, GLubyte a )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatcolorptr;
+   dest[0] = UBYTE_TO_FLOAT(r);
+   dest[1] = UBYTE_TO_FLOAT(g);
+   dest[2] = UBYTE_TO_FLOAT(b);
+   dest[3] = UBYTE_TO_FLOAT(a);
+}
+
+static void r200_Color4ubv_4f( const GLubyte *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatcolorptr;
+   dest[0] = UBYTE_TO_FLOAT(v[0]);
+   dest[1] = UBYTE_TO_FLOAT(v[1]);
+   dest[2] = UBYTE_TO_FLOAT(v[2]);
+   dest[3] = UBYTE_TO_FLOAT(v[3]);
+}
+
+
+static void r200_Color3f_4f( GLfloat r, GLfloat g, GLfloat b )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatcolorptr;
+   dest[0] = r;
+   dest[1] = g;
+   dest[2] = b;
+   dest[3] = 1.0;              
+}
+
+static void r200_Color3fv_4f( const GLfloat *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatcolorptr;
+   dest[0] = v[0];
+   dest[1] = v[1];
+   dest[2] = v[2];
+   dest[3] = 1.0;
+}
+
+static void r200_Color4f_4f( GLfloat r, GLfloat g, GLfloat b, GLfloat a )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatcolorptr;
+   dest[0] = r;
+   dest[1] = g;
+   dest[2] = b;
+   dest[3] = a;
+}
+
+static void r200_Color4fv_4f( const GLfloat *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatcolorptr;
+   dest[0] = v[0];
+   dest[1] = v[1];
+   dest[2] = v[2];
+   dest[3] = v[3];
+}
+
+
+/* Color for float color formats:
+ */
+static void r200_Color3ub_3f( GLubyte r, GLubyte g, GLubyte b )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatcolorptr;
+   dest[0] = UBYTE_TO_FLOAT(r);
+   dest[1] = UBYTE_TO_FLOAT(g);
+   dest[2] = UBYTE_TO_FLOAT(b);
+}
+
+static void r200_Color3ubv_3f( const GLubyte *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatcolorptr;
+   dest[0] = UBYTE_TO_FLOAT(v[0]);
+   dest[1] = UBYTE_TO_FLOAT(v[1]);
+   dest[2] = UBYTE_TO_FLOAT(v[2]);
+}
+
+static void r200_Color4ub_3f( GLubyte r, GLubyte g, GLubyte b, GLubyte a )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatcolorptr;
+   dest[0] = UBYTE_TO_FLOAT(r);
+   dest[1] = UBYTE_TO_FLOAT(g);
+   dest[2] = UBYTE_TO_FLOAT(b);
+   ctx->Current.Attrib[VERT_ATTRIB_COLOR0][3] = UBYTE_TO_FLOAT(a);
+}
+
+static void r200_Color4ubv_3f( const GLubyte *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatcolorptr;
+   dest[0] = UBYTE_TO_FLOAT(v[0]);
+   dest[1] = UBYTE_TO_FLOAT(v[1]);
+   dest[2] = UBYTE_TO_FLOAT(v[2]);
+   ctx->Current.Attrib[VERT_ATTRIB_COLOR0][3] = UBYTE_TO_FLOAT(v[3]);
+}
+
+
+static void r200_Color3f_3f( GLfloat r, GLfloat g, GLfloat b )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatcolorptr;
+   dest[0] = r;
+   dest[1] = g;
+   dest[2] = b;
+}
+
+static void r200_Color3fv_3f( const GLfloat *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatcolorptr;
+   dest[0] = v[0];
+   dest[1] = v[1];
+   dest[2] = v[2];
+}
+
+static void r200_Color4f_3f( GLfloat r, GLfloat g, GLfloat b, GLfloat a )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatcolorptr;
+   dest[0] = r;
+   dest[1] = g;
+   dest[2] = b;
+   ctx->Current.Attrib[VERT_ATTRIB_COLOR0][3] = a;
+}
+
+static void r200_Color4fv_3f( const GLfloat *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatcolorptr;
+   dest[0] = v[0];
+   dest[1] = v[1];
+   dest[2] = v[2];
+   ctx->Current.Attrib[VERT_ATTRIB_COLOR0][3] = v[3]; 
+}
+
+
+/* Secondary Color:
+ */
+static void r200_SecondaryColor3ubEXT_ub( GLubyte r, GLubyte g, GLubyte b )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   r200_color_t *dest = rmesa->vb.specptr;
+   dest->red   = r;
+   dest->green = g;
+   dest->blue  = b;
+   dest->alpha = 0xff;
+}
+
+static void r200_SecondaryColor3ubvEXT_ub( const GLubyte *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   r200_color_t *dest = rmesa->vb.specptr;
+   dest->red   = v[0];
+   dest->green = v[1];
+   dest->blue  = v[2];
+   dest->alpha = 0xff;
+}
+
+static void r200_SecondaryColor3fEXT_ub( GLfloat r, GLfloat g, GLfloat b )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   r200_color_t *dest = rmesa->vb.specptr;
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->red,          r );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->green, g );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->blue,  b );
+   dest->alpha = 255;
+}
+
+static void r200_SecondaryColor3fvEXT_ub( const GLfloat *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   r200_color_t *dest = rmesa->vb.specptr;
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->red,          v[0] );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->green, v[1] );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->blue,  v[2] );
+   dest->alpha = 255;
+}
+
+static void r200_SecondaryColor3ubEXT_3f( GLubyte r, GLubyte g, GLubyte b )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatspecptr;
+   dest[0] = UBYTE_TO_FLOAT(r);
+   dest[1] = UBYTE_TO_FLOAT(g);
+   dest[2] = UBYTE_TO_FLOAT(b);
+   dest[3] = 1.0;
+}
+
+static void r200_SecondaryColor3ubvEXT_3f( const GLubyte *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatspecptr;
+   dest[0] = UBYTE_TO_FLOAT(v[0]);
+   dest[1] = UBYTE_TO_FLOAT(v[1]);
+   dest[2] = UBYTE_TO_FLOAT(v[2]);
+   dest[3] = 1.0;
+}
+
+static void r200_SecondaryColor3fEXT_3f( GLfloat r, GLfloat g, GLfloat b )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatspecptr;
+   dest[0] = r;
+   dest[1] = g;
+   dest[2] = b;
+   dest[3] = 1.0;
+}
+
+static void r200_SecondaryColor3fvEXT_3f( const GLfloat *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatspecptr;
+   dest[0] = v[0];
+   dest[1] = v[1];
+   dest[2] = v[2];
+   dest[3] = 1.0;
+}
+
+
+
+/* Normal
+ */
+static void r200_Normal3f( GLfloat n0, GLfloat n1, GLfloat n2 )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.normalptr;
+   dest[0] = n0;
+   dest[1] = n1;
+   dest[2] = n2;
+}
+
+static void r200_Normal3fv( const GLfloat *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.normalptr;
+   dest[0] = v[0];
+   dest[1] = v[1];
+   dest[2] = v[2];
+}
+
+
+/* TexCoord
+ */
+static void r200_TexCoord1f( GLfloat s )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.texcoordptr[0];
+   dest[0] = s;
+   dest[1] = 0;
+}
+
+static void r200_TexCoord1fv( const GLfloat *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.texcoordptr[0];
+   dest[0] = v[0];
+   dest[1] = 0;
+}
+
+static void r200_TexCoord2f( GLfloat s, GLfloat t )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.texcoordptr[0];
+   dest[0] = s;
+   dest[1] = t;
+}
+
+static void r200_TexCoord2fv( const GLfloat *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.texcoordptr[0];
+   dest[0] = v[0];
+   dest[1] = v[1];
+}
+
+
+/* MultiTexcoord
+ * 
+ * Technically speaking, these functions should subtract GL_TEXTURE0 from
+ * \c target before masking and using it.  The value of GL_TEXTURE0 is 0x84C0,
+ * which has the low-order 5 bits 0.  For all possible valid values of 
+ * \c target.  Subtracting GL_TEXTURE0 has the net effect of masking \c target
+ * with 0x1F.  Masking with 0x1F and then masking with 0x01 is redundant, so
+ * the subtraction has been omitted.
+ */
+
+static void r200_MultiTexCoord1fARB( GLenum target, GLfloat s  )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.texcoordptr[target & 1];
+   dest[0] = s;
+   dest[1] = 0;
+}
+
+static void r200_MultiTexCoord1fvARB( GLenum target, const GLfloat *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.texcoordptr[target & 1];
+   dest[0] = v[0];
+   dest[1] = 0;
+}
+
+static void r200_MultiTexCoord2fARB( GLenum target, GLfloat s, GLfloat t )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.texcoordptr[target & 1];
+   dest[0] = s;
+   dest[1] = t;
+}
+
+static void r200_MultiTexCoord2fvARB( GLenum target, const GLfloat *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.texcoordptr[target & 1];
+   dest[0] = v[0];
+   dest[1] = v[1];
+}
+
+static struct dynfn *lookup( struct dynfn *l, const int *key )
+{
+   struct dynfn *f;
+
+   foreach( f, l ) {
+      if (f->key[0] == key[0] && f->key[1] == key[1]) 
+        return f;
+   }
+
+   return 0;
+}
+
+/* Can't use the loopback template for this:
+ */
+
+#define CHOOSE(FN, FNTYPE, MASK0, MASK1, ARGS1, ARGS2 )                        \
+static void choose_##FN ARGS1                                          \
+{                                                                      \
+   GET_CURRENT_CONTEXT(ctx);                                           \
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);                           \
+   int key[2];                                                         \
+   struct dynfn *dfn;                                                  \
+                                                                       \
+   key[0] = rmesa->vb.vtxfmt_0 & MASK0;                                        \
+   key[1] = rmesa->vb.vtxfmt_1 & MASK1;                                        \
+                                                                       \
+   dfn = lookup( &rmesa->vb.dfn_cache.FN, key );                       \
+   if (dfn == 0)                                                       \
+      dfn = rmesa->vb.codegen.FN( ctx, key );                          \
+   else if (R200_DEBUG & DEBUG_CODEGEN)                                        \
+      fprintf(stderr, "%s -- cached codegen\n", __FUNCTION__ );                \
+                                                                       \
+   if (dfn)                                                            \
+      ctx->Exec->FN = (FNTYPE)(dfn->code);                             \
+   else {                                                              \
+      if (R200_DEBUG & DEBUG_CODEGEN)                                  \
+        fprintf(stderr, "%s -- generic version\n", __FUNCTION__ );     \
+      ctx->Exec->FN = r200_##FN;                                       \
+   }                                                                   \
+                                                                       \
+   ctx->Driver.NeedFlush |= FLUSH_UPDATE_CURRENT;                      \
+   ctx->Exec->FN ARGS2;                                                        \
+}
+
+
+
+/* For the _3f case, only allow one color function to be hooked in at
+ * a time.  Eventually, use a similar mechanism to allow selecting the
+ * color component of the vertex format based on client behaviour.  
+ *
+ * Note:  Perform these actions even if there is a codegen or cached 
+ * codegen version of the chosen function.
+ */
+#define CHOOSE_COLOR(FN, FNTYPE, NR, MASK0, MASK1, ARGS1, ARGS2 )      \
+static void choose_##FN ARGS1                                          \
+{                                                                      \
+   GET_CURRENT_CONTEXT(ctx);                                           \
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);                   \
+   int key[2];                                                         \
+   struct dynfn *dfn;                                                  \
+                                                                       \
+   key[0] = rmesa->vb.vtxfmt_0 & MASK0;                                        \
+   key[1] = rmesa->vb.vtxfmt_1 & MASK1;                                        \
+                                                                       \
+   if (VTX_COLOR(rmesa->vb.vtxfmt_0,0) == R200_VTX_PK_RGBA) {          \
+      ctx->Exec->FN = r200_##FN##_ub;                                  \
+   }                                                                   \
+   else if (VTX_COLOR(rmesa->vb.vtxfmt_0,0) == R200_VTX_FP_RGB) {      \
+                                                                       \
+      if (rmesa->vb.installed_color_3f_sz != NR) {                     \
+         rmesa->vb.installed_color_3f_sz = NR;                         \
+         if (NR == 3) ctx->Current.Attrib[VERT_ATTRIB_COLOR0][3] = 1.0;        \
+         if (ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT) {           \
+            r200_copy_to_current( ctx );                               \
+            _mesa_install_exec_vtxfmt( ctx, &rmesa->vb.vtxfmt );       \
+            ctx->Exec->FN ARGS2;                                       \
+            return;                                                    \
+         }                                                             \
+      }                                                                        \
+                                                                       \
+      ctx->Exec->FN = r200_##FN##_3f;                                  \
+   }                                                                   \
+   else {                                                              \
+      ctx->Exec->FN = r200_##FN##_4f;                                  \
+   }                                                                   \
+                                                                       \
+                                                                       \
+   dfn = lookup( &rmesa->vb.dfn_cache.FN, key );                       \
+   if (!dfn) dfn = rmesa->vb.codegen.FN( ctx, key );                   \
+                                                                       \
+   if (dfn) {                                                          \
+      if (R200_DEBUG & DEBUG_CODEGEN)                                  \
+         fprintf(stderr, "%s -- codegen version\n", __FUNCTION__ );    \
+      ctx->Exec->FN = (FNTYPE)dfn->code;                               \
+   }                                                                   \
+   else if (R200_DEBUG & DEBUG_CODEGEN)                                        \
+         fprintf(stderr, "%s -- 'c' version\n", __FUNCTION__ );                \
+                                                                       \
+   ctx->Driver.NeedFlush |= FLUSH_UPDATE_CURRENT;                      \
+   ctx->Exec->FN ARGS2;                                                        \
+}
+
+
+
+/* Right now there are both _ub and _3f versions of the secondary color
+ * functions.  Currently, we only set-up the hardware to use the _ub versions.
+ * The _3f versions are needed for the cases where secondary color isn't used
+ * in the vertex format, but it still needs to be stored in the context
+ * state vector.
+ */
+#define CHOOSE_SECONDARY_COLOR(FN, FNTYPE, MASK0, MASK1, ARGS1, ARGS2 )        \
+static void choose_##FN ARGS1                                          \
+{                                                                      \
+   GET_CURRENT_CONTEXT(ctx);                                           \
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);                   \
+   int key[2];                                                         \
+   struct dynfn *dfn;                                                  \
+                                                                       \
+   key[0] = rmesa->vb.vtxfmt_0 & MASK0;                                        \
+   key[1] = rmesa->vb.vtxfmt_1 & MASK1;                                        \
+                                                                       \
+   dfn = lookup( &rmesa->vb.dfn_cache.FN, key );                       \
+   if (dfn == 0)                                                       \
+      dfn = rmesa->vb.codegen.FN( ctx, key );                  \
+   else  if (R200_DEBUG & DEBUG_CODEGEN)                               \
+      fprintf(stderr, "%s -- cached version\n", __FUNCTION__ );                \
+                                                                       \
+   if (dfn)                                                            \
+      ctx->Exec->FN = (FNTYPE)(dfn->code);                     \
+   else {                                                              \
+      if (R200_DEBUG & DEBUG_CODEGEN)                                  \
+         fprintf(stderr, "%s -- generic version\n", __FUNCTION__ );    \
+      ctx->Exec->FN = (VTX_COLOR(rmesa->vb.vtxfmt_0,1) == R200_VTX_PK_RGBA) \
+         ? r200_##FN##_ub : r200_##FN##_3f;                            \
+   }                                                                   \
+                                                                       \
+   ctx->Driver.NeedFlush |= FLUSH_UPDATE_CURRENT;              \
+   ctx->Exec->FN ARGS2;                                                \
+}
+
+
+
+
+
+/* VTXFMT_0
+ */
+#define MASK_XYZW  (R200_VTX_W0|R200_VTX_Z0)
+#define MASK_NORM  (MASK_XYZW|R200_VTX_N0)
+#define MASK_COLOR (MASK_NORM |(R200_VTX_COLOR_MASK<<R200_VTX_COLOR_0_SHIFT))
+#define MASK_SPEC  (MASK_COLOR|(R200_VTX_COLOR_MASK<<R200_VTX_COLOR_1_SHIFT))
+
+/* VTXFMT_1
+ */
+#define MASK_ST0 (0x7 << R200_VTX_TEX0_COMP_CNT_SHIFT)
+
+
+
+typedef void (*p4f)( GLfloat, GLfloat, GLfloat, GLfloat );
+typedef void (*p3f)( GLfloat, GLfloat, GLfloat );
+typedef void (*p2f)( GLfloat, GLfloat );
+typedef void (*p1f)( GLfloat );
+typedef void (*pe2f)( GLenum, GLfloat, GLfloat );
+typedef void (*pe1f)( GLenum, GLfloat );
+typedef void (*p4ub)( GLubyte, GLubyte, GLubyte, GLubyte );
+typedef void (*p3ub)( GLubyte, GLubyte, GLubyte );
+typedef void (*pfv)( const GLfloat * );
+typedef void (*pefv)( GLenum, const GLfloat * );
+typedef void (*pubv)( const GLubyte * );
+
+
+CHOOSE(Normal3f, p3f, MASK_NORM, 0, 
+       (GLfloat a,GLfloat b,GLfloat c), (a,b,c))
+CHOOSE(Normal3fv, pfv, MASK_NORM, 0, 
+       (const GLfloat *v), (v))
+
+CHOOSE_COLOR(Color4ub, p4ub, 4, MASK_COLOR, 0,
+       (GLubyte a,GLubyte b, GLubyte c, GLubyte d), (a,b,c,d))
+CHOOSE_COLOR(Color4ubv, pubv, 4, MASK_COLOR, 0, 
+       (const GLubyte *v), (v))
+CHOOSE_COLOR(Color3ub, p3ub, 3, MASK_COLOR, 0, 
+       (GLubyte a,GLubyte b, GLubyte c), (a,b,c))
+CHOOSE_COLOR(Color3ubv, pubv, 3, MASK_COLOR, 0, 
+       (const GLubyte *v), (v))
+
+CHOOSE_COLOR(Color4f, p4f, 4, MASK_COLOR, 0, 
+       (GLfloat a,GLfloat b, GLfloat c, GLfloat d), (a,b,c,d))
+CHOOSE_COLOR(Color4fv, pfv, 4, MASK_COLOR, 0, 
+       (const GLfloat *v), (v))
+CHOOSE_COLOR(Color3f, p3f, 3, MASK_COLOR, 0,
+       (GLfloat a,GLfloat b, GLfloat c), (a,b,c))
+CHOOSE_COLOR(Color3fv, pfv, 3, MASK_COLOR, 0,
+       (const GLfloat *v), (v))
+
+
+CHOOSE_SECONDARY_COLOR(SecondaryColor3ubEXT, p3ub, MASK_SPEC, 0, 
+       (GLubyte a,GLubyte b, GLubyte c), (a,b,c))
+CHOOSE_SECONDARY_COLOR(SecondaryColor3ubvEXT, pubv, MASK_SPEC, 0, 
+       (const GLubyte *v), (v))
+CHOOSE_SECONDARY_COLOR(SecondaryColor3fEXT, p3f, MASK_SPEC, 0,
+       (GLfloat a,GLfloat b, GLfloat c), (a,b,c))
+CHOOSE_SECONDARY_COLOR(SecondaryColor3fvEXT, pfv, MASK_SPEC, 0,
+       (const GLfloat *v), (v))
+
+CHOOSE(TexCoord2f, p2f, ~0, MASK_ST0, 
+       (GLfloat a,GLfloat b), (a,b))
+CHOOSE(TexCoord2fv, pfv, ~0, MASK_ST0, 
+       (const GLfloat *v), (v))
+CHOOSE(TexCoord1f, p1f, ~0, MASK_ST0, 
+       (GLfloat a), (a))
+CHOOSE(TexCoord1fv, pfv, ~0, MASK_ST0, 
+       (const GLfloat *v), (v))
+
+CHOOSE(MultiTexCoord2fARB, pe2f, ~0, ~0,
+        (GLenum u,GLfloat a,GLfloat b), (u,a,b))
+CHOOSE(MultiTexCoord2fvARB, pefv, ~0, ~0,
+       (GLenum u,const GLfloat *v), (u,v))
+CHOOSE(MultiTexCoord1fARB, pe1f, ~0, ~0,
+        (GLenum u,GLfloat a), (u,a))
+CHOOSE(MultiTexCoord1fvARB, pefv, ~0, ~0,
+       (GLenum u,const GLfloat *v), (u,v))
+
+CHOOSE(Vertex3f, p3f, ~0, ~0, 
+       (GLfloat a,GLfloat b,GLfloat c), (a,b,c))
+CHOOSE(Vertex3fv, pfv, ~0, ~0, 
+       (const GLfloat *v), (v))
+CHOOSE(Vertex2f, p2f, ~0, ~0, 
+       (GLfloat a,GLfloat b), (a,b))
+CHOOSE(Vertex2fv, pfv, ~0, ~0, 
+       (const GLfloat *v), (v))
+
+
+
+
+
+void r200VtxfmtInitChoosers( GLvertexformat *vfmt )
+{
+   vfmt->Color3f = choose_Color3f;
+   vfmt->Color3fv = choose_Color3fv;
+   vfmt->Color3ub = choose_Color3ub;
+   vfmt->Color3ubv = choose_Color3ubv;
+   vfmt->Color4f = choose_Color4f;
+   vfmt->Color4fv = choose_Color4fv;
+   vfmt->Color4ub = choose_Color4ub;
+   vfmt->Color4ubv = choose_Color4ubv;
+   vfmt->SecondaryColor3fEXT = choose_SecondaryColor3fEXT;
+   vfmt->SecondaryColor3fvEXT = choose_SecondaryColor3fvEXT;
+   vfmt->SecondaryColor3ubEXT = choose_SecondaryColor3ubEXT;
+   vfmt->SecondaryColor3ubvEXT = choose_SecondaryColor3ubvEXT;
+   vfmt->MultiTexCoord1fARB = choose_MultiTexCoord1fARB;
+   vfmt->MultiTexCoord1fvARB = choose_MultiTexCoord1fvARB;
+   vfmt->MultiTexCoord2fARB = choose_MultiTexCoord2fARB;
+   vfmt->MultiTexCoord2fvARB = choose_MultiTexCoord2fvARB;
+   vfmt->Normal3f = choose_Normal3f;
+   vfmt->Normal3fv = choose_Normal3fv;
+   vfmt->TexCoord1f = choose_TexCoord1f;
+   vfmt->TexCoord1fv = choose_TexCoord1fv;
+   vfmt->TexCoord2f = choose_TexCoord2f;
+   vfmt->TexCoord2fv = choose_TexCoord2fv;
+   vfmt->Vertex2f = choose_Vertex2f;
+   vfmt->Vertex2fv = choose_Vertex2fv;
+   vfmt->Vertex3f = choose_Vertex3f;
+   vfmt->Vertex3fv = choose_Vertex3fv;
+}
+
+
+static struct dynfn *codegen_noop( GLcontext *ctx, const int *key )
+{
+   (void) ctx; (void) key;
+   return 0;
+}
+
+void r200InitCodegen( struct dfn_generators *gen )
+{
+   gen->Vertex3f = codegen_noop;
+   gen->Vertex3fv = codegen_noop;
+   gen->Color4ub = codegen_noop;
+   gen->Color4ubv = codegen_noop;
+   gen->Normal3f = codegen_noop;
+   gen->Normal3fv = codegen_noop;
+   gen->TexCoord2f = codegen_noop;
+   gen->TexCoord2fv = codegen_noop;
+   gen->MultiTexCoord2fARB = codegen_noop;
+   gen->MultiTexCoord2fvARB = codegen_noop;
+   gen->Vertex2f = codegen_noop;
+   gen->Vertex2fv = codegen_noop;
+   gen->Color3ub = codegen_noop;
+   gen->Color3ubv = codegen_noop;
+   gen->Color4f = codegen_noop;
+   gen->Color4fv = codegen_noop;
+   gen->Color3f = codegen_noop;
+   gen->Color3fv = codegen_noop;
+   gen->SecondaryColor3fEXT = codegen_noop;
+   gen->SecondaryColor3fvEXT = codegen_noop;
+   gen->SecondaryColor3ubEXT = codegen_noop;
+   gen->SecondaryColor3ubvEXT = codegen_noop;
+   gen->TexCoord1f = codegen_noop;
+   gen->TexCoord1fv = codegen_noop;
+   gen->MultiTexCoord1fARB = codegen_noop;
+   gen->MultiTexCoord1fvARB = codegen_noop;
+
+   if (!getenv("R200_NO_CODEGEN")) {
+#if defined(USE_X86_ASM)
+      r200InitX86Codegen( gen );
+#endif
+
+#if defined(USE_SSE_ASM)
+      r200InitSSECodegen( gen );
+#endif
+   }
+}
diff --git a/src/mesa/drivers/dri/r200/r200_vtxfmt_sse.c b/src/mesa/drivers/dri/r200/r200_vtxfmt_sse.c
new file mode 100644 (file)
index 0000000..83eb3e1
--- /dev/null
@@ -0,0 +1,231 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "glheader.h"
+#include "imports.h"
+#include "simple_list.h" 
+#include "r200_vtxfmt.h"
+
+#if defined(USE_SSE_ASM)
+#include "X86/common_x86_asm.h"
+
+#define EXTERN( FUNC )         \
+extern const char *FUNC;       \
+extern const char *FUNC##_end
+
+EXTERN( _sse_Attribute2fv );
+EXTERN( _sse_Attribute2f );
+EXTERN( _sse_Attribute3fv );
+EXTERN( _sse_Attribute3f );
+EXTERN( _sse_MultiTexCoord2fv );
+EXTERN( _sse_MultiTexCoord2f );
+EXTERN( _sse_MultiTexCoord2fv_2 );
+EXTERN( _sse_MultiTexCoord2f_2 );
+
+/* Build specialized versions of the immediate calls on the fly for
+ * the current state.
+ */
+
+static struct dynfn *r200_makeSSEAttribute2fv( struct dynfn * cache, const int * key,
+                                              const char * name, void * dest)
+{
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+   if (R200_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", name, key[0] );
+
+   DFN ( _sse_Attribute2fv, (*cache) );
+   FIXUP(dfn->code, 10, 0x0, (int)dest);
+   return dfn;
+}
+
+static struct dynfn *r200_makeSSEAttribute2f( struct dynfn * cache, const int * key,
+                                             const char * name, void * dest )
+{
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+   if (R200_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", name, key[0] );
+
+   DFN ( _sse_Attribute2f, (*cache) );
+   FIXUP(dfn->code, 8, 0x0, (int)dest); 
+   return dfn;
+}
+
+static struct dynfn *r200_makeSSEAttribute3fv( struct dynfn * cache, const int * key,
+                                              const char * name, void * dest)
+{
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+   if (R200_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", name, key[0] );
+
+   DFN ( _sse_Attribute3fv, (*cache) );
+   FIXUP(dfn->code, 13, 0x0, (int)dest);
+   FIXUP(dfn->code, 18, 0x8, 8+(int)dest);
+   return dfn;
+}
+
+static struct dynfn *r200_makeSSEAttribute3f( struct dynfn * cache, const int * key,
+                                             const char * name, void * dest )
+{
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+   if (R200_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", name, key[0] );
+
+   DFN ( _sse_Attribute3f, (*cache) );
+   FIXUP(dfn->code, 12, 0x0, (int)dest); 
+   FIXUP(dfn->code, 17, 0x8, 8+(int)dest); 
+   return dfn;
+}
+
+static struct dynfn *r200_makeSSENormal3fv( GLcontext *ctx, const int *key )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   return r200_makeSSEAttribute3fv( & rmesa->vb.dfn_cache.Normal3fv, key,
+                                   __FUNCTION__, rmesa->vb.normalptr );
+}
+
+static struct dynfn *r200_makeSSENormal3f( GLcontext *ctx, const int * key )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   return r200_makeSSEAttribute3f( & rmesa->vb.dfn_cache.Normal3f, key,
+                                  __FUNCTION__, rmesa->vb.normalptr );
+}
+
+static struct dynfn *r200_makeSSEColor3fv( GLcontext *ctx, const int * key )
+{
+   if (VTX_COLOR(key[0],0) != R200_VTX_FP_RGB) 
+      return 0;
+   else
+   {
+      r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+      return r200_makeSSEAttribute3fv( & rmesa->vb.dfn_cache.Color3fv, key,
+                                      __FUNCTION__, rmesa->vb.floatcolorptr );
+   }
+}
+
+static struct dynfn *r200_makeSSEColor3f( GLcontext *ctx, const int * key )
+{
+   if (VTX_COLOR(key[0],0) != R200_VTX_FP_RGB) 
+      return 0;
+   else
+   {
+      r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+      return r200_makeSSEAttribute3f( & rmesa->vb.dfn_cache.Color3f, key,
+                                     __FUNCTION__, rmesa->vb.floatcolorptr );
+   }
+}
+
+static struct dynfn *r200_makeSSETexCoord2fv( GLcontext *ctx, const int * key )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   return r200_makeSSEAttribute2fv( & rmesa->vb.dfn_cache.TexCoord2fv, key,
+                                   __FUNCTION__, rmesa->vb.texcoordptr[0] );
+}
+
+static struct dynfn *r200_makeSSETexCoord2f( GLcontext *ctx, const int * key )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   return r200_makeSSEAttribute2f( & rmesa->vb.dfn_cache.TexCoord2f, key,
+                                  __FUNCTION__, rmesa->vb.texcoordptr[0] );
+}
+
+static struct dynfn *r200_makeSSEMultiTexCoord2fv( GLcontext *ctx, const int * key )
+{
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   if (R200_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key[0] );
+
+   if (rmesa->vb.texcoordptr[1] == rmesa->vb.texcoordptr[0]+4) {
+      DFN ( _sse_MultiTexCoord2fv, rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
+      FIXUP(dfn->code, 18, 0xdeadbeef, (int)rmesa->vb.texcoordptr[0]); 
+   } else {
+      DFN ( _sse_MultiTexCoord2fv_2, rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
+      FIXUP(dfn->code, 14, 0x0, (int)rmesa->vb.texcoordptr);
+   }
+   return dfn;
+}
+
+static struct dynfn *r200_makeSSEMultiTexCoord2f( GLcontext *ctx, const int * key )
+{
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   if (R200_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key[0] );
+
+   if (rmesa->vb.texcoordptr[1] == rmesa->vb.texcoordptr[0]+4) {
+      DFN ( _sse_MultiTexCoord2f, rmesa->vb.dfn_cache.MultiTexCoord2fARB );
+      FIXUP(dfn->code, 16, 0xdeadbeef, (int)rmesa->vb.texcoordptr[0]); 
+   } else {
+      DFN ( _sse_MultiTexCoord2f_2, rmesa->vb.dfn_cache.MultiTexCoord2fARB );
+      FIXUP(dfn->code, 15, 0x0, (int)rmesa->vb.texcoordptr);
+   }
+   return dfn;
+}
+
+void r200InitSSECodegen( struct dfn_generators *gen )
+{
+   if ( cpu_has_xmm ) {
+      gen->Normal3fv = (void *) r200_makeSSENormal3fv;
+      gen->Normal3f = (void *) r200_makeSSENormal3f;
+      gen->Color3fv = (void *) r200_makeSSEColor3fv;
+      gen->Color3f = (void *) r200_makeSSEColor3f;
+      gen->TexCoord2fv = (void *) r200_makeSSETexCoord2fv;
+      gen->TexCoord2f = (void *) r200_makeSSETexCoord2f;
+      gen->MultiTexCoord2fvARB = (void *) r200_makeSSEMultiTexCoord2fv;
+      gen->MultiTexCoord2fARB = (void *) r200_makeSSEMultiTexCoord2f;
+   }
+}
+
+#else 
+
+void r200InitSSECodegen( struct dfn_generators *gen )
+{
+   (void) gen;
+}
+
+#endif
diff --git a/src/mesa/drivers/dri/r200/r200_vtxfmt_x86.c b/src/mesa/drivers/dri/r200/r200_vtxfmt_x86.c
new file mode 100644 (file)
index 0000000..a806eca
--- /dev/null
@@ -0,0 +1,438 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "glheader.h"
+#include "imports.h"
+#include "simple_list.h" 
+#include "r200_vtxfmt.h"
+
+#if defined(USE_X86_ASM)
+
+#define EXTERN( FUNC )         \
+extern const char *FUNC;       \
+extern const char *FUNC##_end
+
+EXTERN ( _x86_Attribute2fv );
+EXTERN ( _x86_Attribute2f );
+EXTERN ( _x86_Attribute3fv );
+EXTERN ( _x86_Attribute3f );
+EXTERN ( _x86_Vertex3fv_6 );
+EXTERN ( _x86_Vertex3fv_8 );
+EXTERN ( _x86_Vertex3fv );
+EXTERN ( _x86_Vertex3f_4 );
+EXTERN ( _x86_Vertex3f_6 );
+EXTERN ( _x86_Vertex3f );
+EXTERN ( _x86_Color4ubv_ub );
+EXTERN ( _x86_Color4ubv_4f );
+EXTERN ( _x86_Color4ub_ub );
+EXTERN ( _x86_MultiTexCoord2fv );
+EXTERN ( _x86_MultiTexCoord2fv_2 );
+EXTERN ( _x86_MultiTexCoord2f );
+EXTERN ( _x86_MultiTexCoord2f_2 );
+
+
+/* Build specialized versions of the immediate calls on the fly for
+ * the current state.  Generic x86 versions.
+ */
+
+struct dynfn *r200_makeX86Vertex3f( GLcontext *ctx, const int *key )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+   if (R200_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x 0x%08x %d\n", __FUNCTION__, 
+             key[0], key[1], rmesa->vb.vertex_size );
+
+   switch (rmesa->vb.vertex_size) {
+   case 4: {
+
+      DFN ( _x86_Vertex3f_4, rmesa->vb.dfn_cache.Vertex3f );
+      FIXUP(dfn->code, 2, 0x0, (int)&rmesa->vb.dmaptr);
+      FIXUP(dfn->code, 25, 0x0, (int)&rmesa->vb.vertex[3]);
+      FIXUP(dfn->code, 36, 0x0, (int)&rmesa->vb.counter);
+      FIXUP(dfn->code, 46, 0x0, (int)&rmesa->vb.dmaptr);
+      FIXUP(dfn->code, 51, 0x0, (int)&rmesa->vb.counter);
+      FIXUP(dfn->code, 60, 0x0, (int)&rmesa->vb.notify);
+      break;
+   }
+   case 6: {
+
+      DFN ( _x86_Vertex3f_6, rmesa->vb.dfn_cache.Vertex3f );
+      FIXUP(dfn->code, 3, 0x0, (int)&rmesa->vb.dmaptr);
+      FIXUP(dfn->code, 28, 0x0, (int)&rmesa->vb.vertex[3]);
+      FIXUP(dfn->code, 34, 0x0, (int)&rmesa->vb.vertex[4]);
+      FIXUP(dfn->code, 40, 0x0, (int)&rmesa->vb.vertex[5]);
+      FIXUP(dfn->code, 57, 0x0, (int)&rmesa->vb.counter);
+      FIXUP(dfn->code, 63, 0x0, (int)&rmesa->vb.dmaptr);
+      FIXUP(dfn->code, 70, 0x0, (int)&rmesa->vb.counter);
+      FIXUP(dfn->code, 79, 0x0, (int)&rmesa->vb.notify);
+      break;
+   }
+   default: {
+
+      DFN ( _x86_Vertex3f, rmesa->vb.dfn_cache.Vertex3f );
+      FIXUP(dfn->code, 3, 0x0, (int)&rmesa->vb.vertex[3]);
+      FIXUP(dfn->code, 9, 0x0, (int)&rmesa->vb.dmaptr);
+      FIXUP(dfn->code, 37, 0x0, rmesa->vb.vertex_size-3);
+      FIXUP(dfn->code, 44, 0x0, (int)&rmesa->vb.counter);
+      FIXUP(dfn->code, 50, 0x0, (int)&rmesa->vb.dmaptr);
+      FIXUP(dfn->code, 56, 0x0, (int)&rmesa->vb.counter);
+      FIXUP(dfn->code, 67, 0x0, (int)&rmesa->vb.notify);
+   break;
+   }
+   }
+
+   return dfn;
+}
+
+
+
+struct dynfn *r200_makeX86Vertex3fv( GLcontext *ctx, const int *key )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+   if (R200_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x 0x%08x %d\n", __FUNCTION__, 
+             key[0], key[1], rmesa->vb.vertex_size );
+
+   switch (rmesa->vb.vertex_size) {
+   case 6: {
+
+      DFN ( _x86_Vertex3fv_6, rmesa->vb.dfn_cache.Vertex3fv );
+      FIXUP(dfn->code, 1, 0x00000000, (int)&rmesa->vb.dmaptr);
+      FIXUP(dfn->code, 27, 0x0000001c, (int)&rmesa->vb.vertex[3]);
+      FIXUP(dfn->code, 33, 0x00000020, (int)&rmesa->vb.vertex[4]);
+      FIXUP(dfn->code, 45, 0x00000024, (int)&rmesa->vb.vertex[5]);
+      FIXUP(dfn->code, 56, 0x00000000, (int)&rmesa->vb.dmaptr);
+      FIXUP(dfn->code, 61, 0x00000004, (int)&rmesa->vb.counter);
+      FIXUP(dfn->code, 67, 0x00000004, (int)&rmesa->vb.counter);
+      FIXUP(dfn->code, 76, 0x00000008, (int)&rmesa->vb.notify);
+      break;
+   }
+   
+
+   case 8: {
+
+      DFN ( _x86_Vertex3fv_8, rmesa->vb.dfn_cache.Vertex3fv );
+      FIXUP(dfn->code, 1, 0x00000000, (int)&rmesa->vb.dmaptr);
+      FIXUP(dfn->code, 27, 0x0000001c, (int)&rmesa->vb.vertex[3]);
+      FIXUP(dfn->code, 33, 0x00000020, (int)&rmesa->vb.vertex[4]);
+      FIXUP(dfn->code, 45, 0x0000001c, (int)&rmesa->vb.vertex[5]);
+      FIXUP(dfn->code, 51, 0x00000020, (int)&rmesa->vb.vertex[6]);
+      FIXUP(dfn->code, 63, 0x00000024, (int)&rmesa->vb.vertex[7]);
+      FIXUP(dfn->code, 74, 0x00000000, (int)&rmesa->vb.dmaptr);
+      FIXUP(dfn->code, 79, 0x00000004, (int)&rmesa->vb.counter);
+      FIXUP(dfn->code, 85, 0x00000004, (int)&rmesa->vb.counter);
+      FIXUP(dfn->code, 94, 0x00000008, (int)&rmesa->vb.notify);
+      break;
+   }
+   
+
+
+   default: {
+
+      DFN ( _x86_Vertex3fv, rmesa->vb.dfn_cache.Vertex3fv );
+      FIXUP(dfn->code, 8, 0x01010101, (int)&rmesa->vb.dmaptr);
+      FIXUP(dfn->code, 32, 0x00000006, rmesa->vb.vertex_size-3);
+      FIXUP(dfn->code, 37, 0x00000058, (int)&rmesa->vb.vertex[3]);
+      FIXUP(dfn->code, 45, 0x01010101, (int)&rmesa->vb.dmaptr);
+      FIXUP(dfn->code, 50, 0x02020202, (int)&rmesa->vb.counter);
+      FIXUP(dfn->code, 58, 0x02020202, (int)&rmesa->vb.counter);
+      FIXUP(dfn->code, 67, 0x0, (int)&rmesa->vb.notify);
+   break;
+   }
+   }
+
+   return dfn;
+}
+
+static struct dynfn *
+r200_makeX86Attribute2fv( struct dynfn * cache, const int *key,
+                         const char * name, void * dest )
+{
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+   if (R200_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", name, key[0] );
+
+   DFN ( _x86_Attribute2fv, (*cache) );
+   FIXUP(dfn->code, 11, 0x0, (int)dest); 
+   FIXUP(dfn->code, 16, 0x4, 4+(int)dest); 
+
+   return dfn;
+}
+
+static struct dynfn *
+r200_makeX86Attribute2f( struct dynfn * cache, const int *key,
+                        const char * name, void * dest )
+{
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+   if (R200_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", name, key[0] );
+
+   DFN ( _x86_Attribute2f, (*cache) );
+   FIXUP(dfn->code, 1, 0x0, (int)dest); 
+
+   return dfn;
+}
+
+
+static struct dynfn *
+r200_makeX86Attribute3fv( struct dynfn * cache, const int *key,
+                         const char * name, void * dest )
+{
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+   if (R200_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", name, key[0] );
+
+   DFN ( _x86_Attribute3fv, (*cache) );
+   FIXUP(dfn->code, 14, 0x0, (int)dest); 
+   FIXUP(dfn->code, 20, 0x4, 4+(int)dest); 
+   FIXUP(dfn->code, 25, 0x8, 8+(int)dest);
+
+   return dfn;
+}
+
+static struct dynfn *
+r200_makeX86Attribute3f( struct dynfn * cache, const int *key,
+                        const char * name, void * dest )
+{
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+   if (R200_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", name, key[0] );
+
+   DFN ( _x86_Attribute3f, (*cache) );
+   FIXUP(dfn->code, 14, 0x0, (int)dest); 
+   FIXUP(dfn->code, 20, 0x4, 4+(int)dest); 
+   FIXUP(dfn->code, 25, 0x8, 8+(int)dest);
+
+   return dfn;
+}
+
+struct dynfn *r200_makeX86Normal3fv( GLcontext *ctx, const int *key )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   return r200_makeX86Attribute3fv( & rmesa->vb.dfn_cache.Normal3fv, key,
+                                   __FUNCTION__, rmesa->vb.normalptr );
+}
+
+struct dynfn *r200_makeX86Normal3f( GLcontext *ctx, const int *key )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   return r200_makeX86Attribute3f( & rmesa->vb.dfn_cache.Normal3f, key,
+                                  __FUNCTION__, rmesa->vb.normalptr );
+}
+
+struct dynfn *r200_makeX86Color4ubv( GLcontext *ctx, const int *key )
+{
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+
+   if (R200_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key[0] );
+
+   if (VTX_COLOR(key[0],0) == R200_VTX_PK_RGBA) {
+      DFN ( _x86_Color4ubv_ub, rmesa->vb.dfn_cache.Color4ubv);
+      FIXUP(dfn->code, 5, 0x12345678, (int)rmesa->vb.colorptr); 
+      return dfn;
+   } 
+   else {
+
+      DFN ( _x86_Color4ubv_4f, rmesa->vb.dfn_cache.Color4ubv);
+      FIXUP(dfn->code, 2, 0x00000000, (int)_mesa_ubyte_to_float_color_tab); 
+      FIXUP(dfn->code, 27, 0xdeadbeaf, (int)rmesa->vb.floatcolorptr); 
+      FIXUP(dfn->code, 33, 0xdeadbeaf, (int)rmesa->vb.floatcolorptr+4); 
+      FIXUP(dfn->code, 55, 0xdeadbeaf, (int)rmesa->vb.floatcolorptr+8); 
+      FIXUP(dfn->code, 61, 0xdeadbeaf, (int)rmesa->vb.floatcolorptr+12); 
+      return dfn;
+   }
+}
+
+struct dynfn *r200_makeX86Color4ub( GLcontext *ctx, const int *key )
+{
+   if (R200_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key[0] );
+
+   if (VTX_COLOR(key[0],0) == R200_VTX_PK_RGBA) {
+      struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+      r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+      DFN ( _x86_Color4ub_ub, rmesa->vb.dfn_cache.Color4ub );
+      FIXUP(dfn->code, 18, 0x0, (int)rmesa->vb.colorptr); 
+      FIXUP(dfn->code, 24, 0x0, (int)rmesa->vb.colorptr+1); 
+      FIXUP(dfn->code, 30, 0x0, (int)rmesa->vb.colorptr+2); 
+      FIXUP(dfn->code, 36, 0x0, (int)rmesa->vb.colorptr+3); 
+      return dfn;
+   }
+   else
+      return 0;
+}
+
+
+struct dynfn *r200_makeX86Color3fv( GLcontext *ctx, const int *key )
+{
+   if (VTX_COLOR(key[0],0) != R200_VTX_FP_RGB) 
+      return 0;
+   else
+   {
+      r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+      return r200_makeX86Attribute3fv( & rmesa->vb.dfn_cache.Color3fv, key,
+                                      __FUNCTION__, rmesa->vb.floatcolorptr );
+   }
+}
+
+struct dynfn *r200_makeX86Color3f( GLcontext *ctx, const int *key )
+{
+   if (VTX_COLOR(key[0],0) != R200_VTX_FP_RGB) 
+      return 0;
+   else
+   {
+      r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+      return r200_makeX86Attribute3f( & rmesa->vb.dfn_cache.Color3f, key,
+                                     __FUNCTION__, rmesa->vb.floatcolorptr );
+   }
+}
+
+
+
+struct dynfn *r200_makeX86TexCoord2fv( GLcontext *ctx, const int *key )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   return r200_makeX86Attribute2fv( & rmesa->vb.dfn_cache.TexCoord2fv, key,
+                                   __FUNCTION__, rmesa->vb.texcoordptr[0] );
+}
+
+struct dynfn *r200_makeX86TexCoord2f( GLcontext *ctx, const int *key )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   return r200_makeX86Attribute2f( & rmesa->vb.dfn_cache.TexCoord2f, key,
+                                  __FUNCTION__, rmesa->vb.texcoordptr[0] );
+}
+
+struct dynfn *r200_makeX86MultiTexCoord2fvARB( GLcontext *ctx, const int *key )
+{
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   if (R200_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x 0x%08x\n", __FUNCTION__, key[0], key[1] );
+
+   if (rmesa->vb.texcoordptr[1] == rmesa->vb.texcoordptr[0]+4) {
+      DFN ( _x86_MultiTexCoord2fv, rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
+      FIXUP(dfn->code, 21, 0xdeadbeef, (int)rmesa->vb.texcoordptr[0]); 
+      FIXUP(dfn->code, 27, 0xdeadbeef, (int)rmesa->vb.texcoordptr[0]+4);
+   } else {
+      DFN ( _x86_MultiTexCoord2fv_2, rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
+      FIXUP(dfn->code, 14, 0x0, (int)rmesa->vb.texcoordptr);
+   }
+   return dfn;
+}
+
+struct dynfn *r200_makeX86MultiTexCoord2fARB( GLcontext *ctx, 
+                                             const int *key )
+{
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   if (R200_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x 0x%08x\n", __FUNCTION__, key[0], key[1] );
+
+   if (rmesa->vb.texcoordptr[1] == rmesa->vb.texcoordptr[0]+4) {
+      DFN ( _x86_MultiTexCoord2f, rmesa->vb.dfn_cache.MultiTexCoord2fARB );
+      FIXUP(dfn->code, 20, 0xdeadbeef, (int)rmesa->vb.texcoordptr[0]); 
+      FIXUP(dfn->code, 26, 0xdeadbeef, (int)rmesa->vb.texcoordptr[0]+4); 
+   }
+   else {
+      /* Note: this might get generated multiple times, even though the
+       * actual emitted code is the same.
+       */
+      DFN ( _x86_MultiTexCoord2f_2, rmesa->vb.dfn_cache.MultiTexCoord2fARB );
+      FIXUP(dfn->code, 18, 0x0, (int)rmesa->vb.texcoordptr); 
+   }      
+   return dfn;
+}
+
+
+void r200InitX86Codegen( struct dfn_generators *gen )
+{
+   gen->Vertex3f = r200_makeX86Vertex3f;
+   gen->Vertex3fv = r200_makeX86Vertex3fv;
+   gen->Color4ub = r200_makeX86Color4ub; /* PKCOLOR only */
+   gen->Color4ubv = r200_makeX86Color4ubv; /* PKCOLOR only */
+   gen->Normal3f = r200_makeX86Normal3f;
+   gen->Normal3fv = r200_makeX86Normal3fv;
+   gen->TexCoord2f = r200_makeX86TexCoord2f;
+   gen->TexCoord2fv = r200_makeX86TexCoord2fv;
+   gen->MultiTexCoord2fARB = r200_makeX86MultiTexCoord2fARB;
+   gen->MultiTexCoord2fvARB = r200_makeX86MultiTexCoord2fvARB;
+   gen->Color3f = r200_makeX86Color3f;
+   gen->Color3fv = r200_makeX86Color3fv;
+
+   /* Not done:
+    */
+/*     gen->Vertex2f = r200_makeX86Vertex2f; */
+/*     gen->Vertex2fv = r200_makeX86Vertex2fv; */
+/*     gen->Color3ub = r200_makeX86Color3ub; */
+/*     gen->Color3ubv = r200_makeX86Color3ubv; */
+/*     gen->Color4f = r200_makeX86Color4f; */
+/*     gen->Color4fv = r200_makeX86Color4fv; */
+/*     gen->TexCoord1f = r200_makeX86TexCoord1f; */
+/*     gen->TexCoord1fv = r200_makeX86TexCoord1fv; */
+/*     gen->MultiTexCoord1fARB = r200_makeX86MultiTexCoord1fARB; */
+/*     gen->MultiTexCoord1fvARB = r200_makeX86MultiTexCoord1fvARB; */
+}
+
+
+#else 
+
+void r200InitX86Codegen( struct dfn_generators *gen )
+{
+   (void) gen;
+}
+
+#endif
diff --git a/src/mesa/drivers/dri/r200/r200_vtxtmp_x86.S b/src/mesa/drivers/dri/r200/r200_vtxtmp_x86.S
new file mode 100644 (file)
index 0000000..4980ce1
--- /dev/null
@@ -0,0 +1,410 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_vtxtmp_x86.S,v 1.1 2002/10/30 12:51:53 alanh Exp $ */
+/**************************************************************************
+
+Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+#define GLOBL( x )     \
+.globl x;              \
+x:
+
+.data
+.align 4
+GLOBL( _x86_Normal3fv)
+       movl 4(%esp), %eax      /* load 'v' off stack */
+       movl (%eax), %ecx       /* load v[0] */
+       movl 4(%eax), %edx      /* load v[1] */
+       movl 8(%eax), %eax      /* load v[2] */
+       movl %ecx, 0            /* store v[0] to current vertex */
+       movl %edx, 4            /* store v[1] to current vertex */
+       movl %eax, 8            /* store v[2] to current vertex */
+       ret
+GLOBL ( _x86_Normal3fv_end )
+
+/*
+       vertex 3f vertex size 4
+*/
+       
+GLOBL ( _x86_Vertex3f_4 )
+       movl    (0), %ecx
+       movl    4(%esp), %eax
+       movl    8(%esp), %edx
+       movl    %eax, (%ecx)
+       movl    %edx, 4(%ecx)
+       movl    12(%esp), %eax
+       movl    (0), %edx
+       movl    %eax, 8(%ecx)
+       movl    %edx, 12(%ecx)
+       movl    (0), %eax
+       addl    $16, %ecx
+       dec     %eax
+       movl    %ecx, (0)
+       movl    %eax, (0)
+       je      .1 
+       ret
+.1:    jmp     *0
+       
+GLOBL ( _x86_Vertex3f_4_end )
+
+/*
+       vertex 3f vertex size 6
+*/
+GLOBL ( _x86_Vertex3f_6 )
+       push    %edi
+       movl    (0), %edi
+       movl    8(%esp), %eax
+       movl    12(%esp), %edx
+       movl    16(%esp), %ecx
+       movl    %eax, (%edi)
+       movl    %edx, 4(%edi)
+       movl    %ecx, 8(%edi)
+       movl    (0), %eax
+       movl    (0), %edx
+       movl    (0), %ecx
+       movl    %eax, 12(%edi)
+       movl    %edx, 16(%edi)
+       movl    %ecx, 20(%edi)
+       addl    $24, %edi
+       movl    (0), %eax
+       movl    %edi, (0)
+       dec     %eax
+       pop     %edi
+       movl    %eax, (0)
+       je      .2
+       ret
+.2:    jmp     *0
+GLOBL ( _x86_Vertex3f_6_end )
+/*
+       vertex 3f generic size
+*/
+GLOBL ( _x86_Vertex3f )
+       push    %edi
+       push    %esi
+       movl    $0, %esi
+       movl    (0), %edi
+       movl    12(%esp), %eax
+       movl    16(%esp), %edx
+       movl    20(%esp), %ecx
+       movl    %eax, (%edi)
+       movl    %edx, 4(%edi)
+       movl    %ecx, 8(%edi)
+       addl    $12, %edi
+       movl    $0, %ecx
+       repz
+       movsl %ds:(%esi), %es:(%edi)
+       movl    (0), %eax
+       movl    %edi, (0)
+       dec     %eax
+       movl    %eax, (0)
+       pop     %esi
+       pop     %edi
+       je      .3
+       ret
+.3:    jmp     *0
+
+GLOBL ( _x86_Vertex3f_end )
+
+/*
+       Vertex 3fv vertex size 6
+*/
+GLOBL ( _x86_Vertex3fv_6 )
+       movl    (0), %eax
+       movl    4(%esp), %ecx
+       movl    (%ecx), %edx
+       movl    %edx, (%eax)
+       movl    4(%ecx), %edx
+       movl    8(%ecx), %ecx
+       movl    %edx, 4(%eax)
+       movl    %ecx, 8(%eax)
+       movl    (28), %edx
+       movl    (32), %ecx
+       movl    %edx, 12(%eax)
+       movl    %ecx, 16(%eax)
+       movl    (36), %edx
+       movl    %edx, 20(%eax)
+       addl    $24, %eax
+       movl    %eax, 0
+       movl    4, %eax
+       dec     %eax
+       movl    %eax, 4
+       je      .4
+       ret
+.4:    jmp    *8
+       
+GLOBL ( _x86_Vertex3fv_6_end )
+
+/*
+       Vertex 3fv vertex size 8
+*/
+GLOBL ( _x86_Vertex3fv_8 )
+       movl    (0), %eax
+       movl    4(%esp), %ecx
+       movl    (%ecx), %edx
+       movl    %edx ,(%eax)
+       movl    4(%ecx) ,%edx
+       movl    8(%ecx) ,%ecx
+       movl    %edx, 4(%eax)
+       movl    %ecx, 8(%eax)
+       movl    (28), %edx
+       movl    (32), %ecx
+       movl    %edx, 12(%eax)
+       movl    %ecx, 16(%eax)
+       movl    (28), %edx
+       movl    (32), %ecx
+       movl    %edx, 20(%eax)
+       movl    %ecx, 24(%eax)
+       movl    (36), %edx
+       movl    %edx, 28(%eax)
+       addl    $32, %eax
+       movl    %eax, (0)
+       movl    4, %eax
+       dec     %eax
+       movl    %eax, (4)
+       je      .5
+       ret
+.5:    jmp    *8
+       
+GLOBL ( _x86_Vertex3fv_8_end )
+
+/*
+       Vertex 3fv generic vertex size
+*/
+GLOBL ( _x86_Vertex3fv )
+       movl    4(%esp), %edx
+       push    %edi
+       push    %esi
+       movl    (0x1010101), %edi
+       movl    (%edx), %eax
+       movl    4(%edx), %ecx
+       movl    8(%edx), %esi
+       movl    %eax, (%edi)
+       movl    %ecx, 4(%edi)
+       movl    %esi, 8(%edi)
+       addl    $12, %edi
+       movl    $6, %ecx
+       movl    $0x58, %esi
+       repz
+       movsl %ds:(%esi), %es:(%edi)
+       movl    %edi, (0x1010101)
+       movl    (0x2020202), %eax
+       pop     %esi
+       pop     %edi
+       dec     %eax
+       movl    %eax, (0x2020202)
+       je      .6
+       ret
+.6:    jmp    *0
+GLOBL ( _x86_Vertex3fv_end )
+
+/*
+       Normal 3f
+*/
+GLOBL ( _x86_Normal3f )
+       movl    $0x12345678, %edx
+       movl    4(%esp), %eax
+       movl    %eax, (%edx)
+       movl    8(%esp), %eax
+       movl    %eax, 4(%edx)
+       movl    12(%esp), %eax
+       movl    %eax, 8(%edx)
+       ret
+GLOBL ( _x86_Normal3f_end )
+
+/*
+       Color 4ubv_ub
+*/
+GLOBL ( _x86_Color4ubv_ub )
+       movl 4(%esp), %eax
+       movl $0x12345678, %edx
+       movl (%eax), %eax
+       movl %eax, (%edx)
+       ret
+GLOBL ( _x86_Color4ubv_ub_end )
+
+/*
+       Color 4ubv 4f
+*/
+GLOBL ( _x86_Color4ubv_4f )
+       push    %ebx
+       movl    $0, %edx
+       xor     %eax, %eax
+       xor     %ecx, %ecx
+       movl    8(%esp), %ebx
+       movl    (%ebx), %ebx
+       mov     %bl, %al
+       mov     %bh, %cl
+       movl    (%edx,%eax,4),%eax
+       movl    (%edx,%ecx,4),%ecx
+       movl    %eax, (0xdeadbeaf)
+       movl    %ecx, (0xdeadbeaf)
+       xor     %eax, %eax
+       xor     %ecx, %ecx
+       shr     $16, %ebx
+       mov     %bl, %al
+       mov     %bh, %cl
+       movl    (%edx,%eax,4), %eax
+       movl    (%edx,%ecx,4), %ecx
+       movl    %eax, (0xdeadbeaf)
+       movl    %ecx, (0xdeadbeaf)
+       pop     %ebx
+       ret
+GLOBL ( _x86_Color4ubv_4f_end )
+
+/*
+
+       Color4ub_ub
+*/
+GLOBL( _x86_Color4ub_ub )
+       push    %ebx
+       movl    8(%esp), %eax
+       movl    12(%esp), %edx
+       movl    16(%esp), %ecx
+       movl    20(%esp), %ebx
+       mov     %al, (0)
+       mov     %dl, (0)
+       mov     %cl, (0)
+       mov     %bl, (0)
+       pop     %ebx
+       ret
+GLOBL( _x86_Color4ub_ub_end )
+
+/*
+       Color3fv_3f
+*/
+GLOBL( _x86_Color3fv_3f )
+       movl    4(%esp), %eax
+       movl    $0, %edx
+       movl    (%eax), %ecx
+       movl    %ecx, (%edx)
+       movl    4(%eax), %ecx
+       movl    %ecx, 4(%edx)
+       movl    8(%eax), %ecx
+       movl    %ecx, 8(%edx)
+       ret
+GLOBL( _x86_Color3fv_3f_end )
+
+/*
+       Color3f_3f
+*/
+GLOBL( _x86_Color3f_3f )
+       movl    $0x12345678, %edx
+       movl    4(%esp), %eax
+       movl    %eax, (%edx)
+       movl    8(%esp,1), %eax
+       movl    %eax, 4(%edx)
+       movl    12(%esp), %eax
+       movl    %eax, 8(%edx)
+       ret
+GLOBL( _x86_Color3f_3f_end )
+
+/*
+       TexCoord2fv
+*/
+
+GLOBL( _x86_TexCoord2fv )
+       movl    4(%esp), %eax
+       movl    $0x12345678, %edx
+       movl    (%eax), %ecx
+       movl    4(%eax), %eax
+       movl    %ecx, (%edx)
+       movl    %eax, 4(%edx)
+       ret
+
+GLOBL( _x86_TexCoord2fv_end )
+/*
+       TexCoord2f
+*/
+GLOBL( _x86_TexCoord2f )
+       movl    $0x12345678, %edx
+       movl    4(%esp), %eax
+       movl    8(%esp), %ecx
+       movl    %eax, (%edx)
+       movl    %ecx, 4(%edx)
+       ret
+GLOBL( _x86_TexCoord2f_end )
+
+/*
+       MultiTexCoord2fvARB st0/st1
+*/
+GLOBL( _x86_MultiTexCoord2fvARB )
+
+       movl    4(%esp), %eax
+       movl    8(%esp), %ecx
+       sub     $0x84c0, %eax
+       and     $1, %eax
+       movl    (%ecx), %edx
+       shl     $3, %eax
+       movl    4(%ecx), %ecx
+       movl    %edx, 0xdeadbeef(%eax)
+       movl    %ecx, 0xdeadbeef(%eax)
+       ret
+GLOBL( _x86_MultiTexCoord2fvARB_end )
+/*
+       MultiTexCoord2fvARB
+*/
+
+GLOBL( _x86_MultiTexCoord2fvARB_2 )
+       movl    4(%esp,1), %eax
+       movl    8(%esp,1), %ecx
+       sub     $0x84c0, %eax
+       and     $0x1, %eax
+       movl    0(,%eax,4), %edx
+       movl    (%ecx), %eax
+       movl    %eax, (%edx)
+       movl    4(%ecx), %eax
+       movl    %eax, 4(%edx)
+       ret
+
+GLOBL( _x86_MultiTexCoord2fvARB_2_end )
+
+/*
+       MultiTexCoord2fARB st0/st1
+*/
+GLOBL( _x86_MultiTexCoord2fARB )
+       movl    4(%esp), %eax
+       movl    8(%esp), %edx
+       sub     $0x84c0, %eax
+       movl    12(%esp), %ecx
+       and     $1, %eax
+       shl     $3, %eax
+       movl    %edx, 0xdeadbeef(%eax)
+       movl    %ecx, 0xdeadbeef(%eax)
+       ret
+GLOBL( _x86_MultiTexCoord2fARB_end )
+
+/*
+       MultiTexCoord2fARB
+*/
+GLOBL( _x86_MultiTexCoord2fARB_2 )
+       movl    4(%esp), %eax
+       movl    8(%esp), %edx
+       sub     $0x84c0, %eax
+       movl    12(%esp,1), %ecx
+       and     $1,%eax
+       movl    0(,%eax,4), %eax
+       movl    %edx, (%eax)
+       movl    %ecx, 4(%eax)
+       ret
+GLOBL( _x86_MultiTexCoord2fARB_2_end )