nvc0: import nvc0 gallium driver
authorChristoph Bumiller <e0425955@student.tuwien.ac.at>
Fri, 12 Nov 2010 14:17:40 +0000 (15:17 +0100)
committerChristoph Bumiller <e0425955@student.tuwien.ac.at>
Fri, 12 Nov 2010 14:17:40 +0000 (15:17 +0100)
48 files changed:
configure.ac
src/gallium/drivers/nouveau/nouveau_screen.h
src/gallium/drivers/nouveau/nouveau_winsys.h
src/gallium/drivers/nouveau/nv_object.xml.h
src/gallium/drivers/nvc0/Makefile [new file with mode: 0644]
src/gallium/drivers/nvc0/SConscript [new file with mode: 0644]
src/gallium/drivers/nvc0/nv50_defs.xml.h [new file with mode: 0644]
src/gallium/drivers/nvc0/nv50_texture.xml.h [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_2d.xml.h [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_3d.xml.h [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_buffer.c [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_context.c [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_context.h [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_draw.c [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_fence.c [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_fence.h [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_formats.c [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_graph_macros.h [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_m2mf.xml.h [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_miptree.c [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_pc.c [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_pc.h [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_pc_emit.c [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_pc_optimize.c [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_pc_print.c [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_pc_regalloc.c [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_program.c [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_program.h [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_push.c [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_push2.c [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_resource.c [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_resource.h [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_screen.c [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_screen.h [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_shader_state.c [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_state.c [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_state_validate.c [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_stateobj.h [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_surface.c [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_tex.c [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_transfer.c [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_transfer.h [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_vbo.c [new file with mode: 0644]
src/gallium/drivers/nvc0/nvc0_winsys.h [new file with mode: 0644]
src/gallium/targets/dri-nouveau/Makefile
src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c

index b43a9fd1e86abe59fcd740db8c7e9a2b3e037e99..432ea7fc44e703ae0497ab7cde53baff0012010a 100644 (file)
@@ -1690,7 +1690,7 @@ AC_ARG_ENABLE([gallium-nouveau],
     [enable_gallium_nouveau="$enableval"],
     [enable_gallium_nouveau=no])
 if test "x$enable_gallium_nouveau" = xyes; then
-    GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS nouveau nvfx nv50"
+    GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS nouveau nvfx nv50 nvc0"
     gallium_check_st "nouveau/drm" "dri-nouveau" "xorg-nouveau"
 fi
 
index 8c290273fb4597537cefb0940378af01083b4b47..1f4e5171c01f351f53be46a5d5347beff15dcf26 100644 (file)
@@ -66,7 +66,7 @@ void nouveau_screen_fini(struct nouveau_screen *);
 
 
 
-
+#ifndef NOUVEAU_NVC0
 static INLINE unsigned
 RING_3D(unsigned mthd, unsigned size)
 {
@@ -78,5 +78,6 @@ RING_3D_NI(unsigned mthd, unsigned size)
 {
        return 0x40000000 | (7 << 13) | (size << 18) | mthd;
 }
+#endif
 
 #endif
index ab480cabd09d9259bc58567a371fd648f833da52..e7acbe6c3d30d4d17edd1422b02dee2e19a742ac 100644 (file)
@@ -10,7 +10,9 @@
 #include "nouveau/nouveau_grobj.h"
 #include "nouveau/nouveau_notifier.h"
 #include "nouveau/nouveau_resource.h"
+#ifndef NOUVEAU_NVC0
 #include "nouveau/nouveau_pushbuf.h"
+#endif
 
 #ifndef NV04_PFIFO_MAX_PACKET_LEN
 #define NV04_PFIFO_MAX_PACKET_LEN 2047
@@ -41,4 +43,7 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *);
 extern struct pipe_screen *
 nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *);
 
+extern struct pipe_screen *
+nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *);
+
 #endif
index cb7653c3fe2f0dfb0aaf6a8724c436e4713c3074..a5b0d0478c8c4ac51a70d0ee87e56753b94bccc5 100644 (file)
@@ -8,12 +8,10 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
 git clone git://0x04.net/rules-ng-ng
 
 The rules-ng-ng source files this header was generated from are:
-- nv30-40_3d.xml (  31709 bytes, from 2010-09-05 07:53:14)
-- copyright.xml  (   6503 bytes, from 2010-04-10 23:15:50)
-- nv_3ddefs.xml  (  15193 bytes, from 2010-09-05 07:50:15)
-- nv_defs.xml    (   4437 bytes, from 2010-08-05 19:38:53)
-- nv_object.xml  (  10424 bytes, from 2010-08-05 19:38:53)
-- nvchipsets.xml (   2824 bytes, from 2010-08-05 19:38:53)
+- nv_object.xml  (  11547 bytes, from 2010-10-24 15:29:34)
+- copyright.xml  (   6498 bytes, from 2010-10-03 13:18:37)
+- nvchipsets.xml (   2907 bytes, from 2010-10-15 16:28:21)
+- nv_defs.xml    (   4437 bytes, from 2010-07-06 07:43:58)
 
 Copyright (C) 2006-2010 by the following authors:
 - Artur Huillet <arthur.huillet@free.fr> (ahuillet)
@@ -37,7 +35,7 @@ Copyright (C) 2006-2010 by the following authors:
 - Mark Carey <mark.carey@gmail.com> (careym)
 - Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
 - nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
-- Patrice Mandin <mandin.patrice@orange.fr> (pmandin, pmdata)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
 - Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
 - Peter Popov <ironpeter@users.sf.net> (ironpeter)
 - Richard Hughes <hughsient@users.sf.net> (hughsient)
@@ -180,6 +178,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define NV50_COMPUTE                                           0x000050c0
 #define NVA3_COMPUTE                                           0x000085c0
 #define NVC0_COMPUTE                                           0x000090c0
+#define NV84_CRYPT                                             0x000074c1
 #define NV01_SUBCHAN__SIZE                                     0x00002000
 #define NV01_SUBCHAN                                           0x00000000
 
@@ -194,9 +193,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #define NV84_SUBCHAN_QUERY_GET                                 0x0000001c
 
-#define NV84_SUBCHAN_UNK20                                     0x00000020
+#define NV84_SUBCHAN_QUERY_INTR                                        0x00000020
 
-#define NV84_SUBCHAN_UNK24                                     0x00000024
+#define NV84_SUBCHAN_WRCACHE_FLUSH                             0x00000024
 
 #define NV10_SUBCHAN_REF_CNT                                   0x00000050
 
@@ -209,7 +208,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #define NV11_SUBCHAN_SEMAPHORE_RELEASE                         0x0000006c
 
-#define NV50_SUBCHAN_UNK80                                     0x00000080
+#define NV40_SUBCHAN_YIELD                                     0x00000080
 
 #define NV01_GRAPH                                             0x00000000
 
@@ -227,5 +226,43 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #define NV40_GRAPH_PM_TRIGGER                                  0x00000140
 
+#define NVC0_SUBCHAN__SIZE                                     0x00008000
+#define NVC0_SUBCHAN                                           0x00000000
+
+#define NVC0_SUBCHAN_OBJECT                                    0x00000000
+
+
+#define NVC0_SUBCHAN_QUERY_ADDRESS_HIGH                                0x00000010
+
+#define NVC0_SUBCHAN_QUERY_ADDRESS_LOW                         0x00000014
+
+#define NVC0_SUBCHAN_QUERY_SEQUENCE                            0x00000018
+
+#define NVC0_SUBCHAN_QUERY_GET                                 0x0000001c
+
+#define NVC0_SUBCHAN_REF_CNT                                   0x00000050
+
+#define NVC0_GRAPH                                             0x00000000
+
+#define NVC0_GRAPH_NOP                                         0x00000100
+
+#define NVC0_GRAPH_NOTIFY_ADDRESS_HIGH                         0x00000104
+
+#define NVC0_GRAPH_NOTIFY_ADDRESS_LOW                          0x00000108
+
+#define NVC0_GRAPH_NOTIFY                                      0x0000010c
+#define NVC0_GRAPH_NOTIFY_WRITE                                        0x00000000
+#define NVC0_GRAPH_NOTIFY_WRITE_AND_AWAKEN                     0x00000001
+
+#define NVC0_GRAPH_SERIALIZE                                   0x00000110
+
+#define NVC0_GRAPH_MACRO_UPLOAD_POS                            0x00000114
+
+#define NVC0_GRAPH_MACRO_UPLOAD_DATA                           0x00000118
+
+#define NVC0_GRAPH_MACRO_ID                                    0x0000011c
+
+#define NVC0_GRAPH_MACRO_POS                                   0x00000120
+
 
 #endif /* NV_OBJECT_XML */
diff --git a/src/gallium/drivers/nvc0/Makefile b/src/gallium/drivers/nvc0/Makefile
new file mode 100644 (file)
index 0000000..7aefd6f
--- /dev/null
@@ -0,0 +1,32 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = nvc0
+
+C_SOURCES = \
+       nvc0_buffer.c \
+       nvc0_context.c \
+       nvc0_draw.c \
+       nvc0_formats.c \
+       nvc0_miptree.c \
+       nvc0_resource.c \
+       nvc0_screen.c \
+       nvc0_state.c \
+       nvc0_state_validate.c \
+       nvc0_surface.c \
+       nvc0_tex.c \
+       nvc0_transfer.c \
+       nvc0_vbo.c \
+       nvc0_program.c \
+       nvc0_shader_state.c \
+       nvc0_pc.c \
+       nvc0_pc_print.c \
+       nvc0_pc_emit.c \
+       nvc0_tgsi_to_nc.c \
+       nvc0_pc_optimize.c \
+       nvc0_pc_regalloc.c \
+       nvc0_push.c \
+       nvc0_push2.c \
+       nvc0_fence.c
+
+include ../../Makefile.template
diff --git a/src/gallium/drivers/nvc0/SConscript b/src/gallium/drivers/nvc0/SConscript
new file mode 100644 (file)
index 0000000..808d689
--- /dev/null
@@ -0,0 +1,34 @@
+Import('*')
+
+env = env.Clone()
+
+nvc0 = env.ConvenienceLibrary(
+    target = 'nvc0',
+    source = [
+        'nvc0_buffer.c',
+        'nvc0_context.c',
+        'nvc0_draw.c',
+        'nvc0_formats.c',
+        'nvc0_miptree.c',
+        'nvc0_resource.c',
+        'nvc0_screen.c',
+        'nvc0_state.c',
+        'nvc0_state_validate.c',
+        'nvc0_surface.c',
+        'nvc0_tex.c',
+        'nvc0_transfer.c',
+        'nvc0_vbo.c',
+        'nvc0_program.c',
+        'nvc0_shader_state.c',
+        'nvc0_pc.c',
+        'nvc0_pc_print.c',
+        'nvc0_pc_emit.c',
+        'nvc0_tgsi_to_nc.c',
+        'nvc0_pc_optimize.c',
+        'nvc0_pc_regalloc.c',
+        'nvc0_push.c',
+        'nvc0_push2.c',
+        'nvc0_fence.c',
+    ])
+
+Export('nvc0')
diff --git a/src/gallium/drivers/nvc0/nv50_defs.xml.h b/src/gallium/drivers/nvc0/nv50_defs.xml.h
new file mode 100644 (file)
index 0000000..1bf2f80
--- /dev/null
@@ -0,0 +1,142 @@
+#ifndef NV50_DEFS_XML
+#define NV50_DEFS_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nv50_defs.xml (   4482 bytes, from 2010-10-03 13:18:37)
+- copyright.xml (   6498 bytes, from 2010-10-03 13:18:37)
+
+Copyright (C) 2006-2010 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin KoÅ›cielnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+#define NV50_SURFACE_FORMAT_R32G32B32A32_FLOAT                 0x000000c0
+#define NV50_SURFACE_FORMAT_R32G32B32A32_SINT                  0x000000c1
+#define NV50_SURFACE_FORMAT_R32G32B32A32_UINT                  0x000000c2
+#define NV50_SURFACE_FORMAT_R32G32B32X32_FLOAT                 0x000000c3
+#define NV50_SURFACE_FORMAT_R16G16B16A16_UNORM                 0x000000c6
+#define NV50_SURFACE_FORMAT_R16G16B16A16_SNORM                 0x000000c7
+#define NV50_SURFACE_FORMAT_R16G16B16A16_SINT                  0x000000c8
+#define NV50_SURFACE_FORMAT_R16G16B16A16_UINT                  0x000000c9
+#define NV50_SURFACE_FORMAT_R16G16B16A16_FLOAT                 0x000000ca
+#define NV50_SURFACE_FORMAT_R32G32_FLOAT                       0x000000cb
+#define NV50_SURFACE_FORMAT_R32G32_SINT                                0x000000cc
+#define NV50_SURFACE_FORMAT_R32G32_UINT                                0x000000cd
+#define NV50_SURFACE_FORMAT_R16G16B16X16_FLOAT                 0x000000ce
+#define NV50_SURFACE_FORMAT_A8R8G8B8_UNORM                     0x000000cf
+#define NV50_SURFACE_FORMAT_A8R8G8B8_SRGB                      0x000000d0
+#define NV50_SURFACE_FORMAT_A2B10G10R10_UNORM                  0x000000d1
+#define NV50_SURFACE_FORMAT_A2B10G10R10_UINT                   0x000000d2
+#define NV50_SURFACE_FORMAT_A8B8G8R8_UNORM                     0x000000d5
+#define NV50_SURFACE_FORMAT_A8B8G8R8_SRGB                      0x000000d6
+#define NV50_SURFACE_FORMAT_A8B8G8R8_SNORM                     0x000000d7
+#define NV50_SURFACE_FORMAT_A8B8G8R8_SINT                      0x000000d8
+#define NV50_SURFACE_FORMAT_A8B8G8R8_UINT                      0x000000d9
+#define NV50_SURFACE_FORMAT_R16G16_UNORM                       0x000000da
+#define NV50_SURFACE_FORMAT_R16G16_SNORM                       0x000000db
+#define NV50_SURFACE_FORMAT_R16G16_SINT                                0x000000dc
+#define NV50_SURFACE_FORMAT_R16G16_UINT                                0x000000dd
+#define NV50_SURFACE_FORMAT_R16G16_FLOAT                       0x000000de
+#define NV50_SURFACE_FORMAT_A2R10G10B10_UNORM                  0x000000df
+#define NV50_SURFACE_FORMAT_B10G11R11_FLOAT                    0x000000e0
+#define NV50_SURFACE_FORMAT_R32_FLOAT                          0x000000e5
+#define NV50_SURFACE_FORMAT_X8R8G8B8_UNORM                     0x000000e6
+#define NV50_SURFACE_FORMAT_X8R8G8B8_SRGB                      0x000000e7
+#define NV50_SURFACE_FORMAT_R5G6B5_UNORM                       0x000000e8
+#define NV50_SURFACE_FORMAT_A1R5G5B5_UNORM                     0x000000e9
+#define NV50_SURFACE_FORMAT_R8G8_UNORM                         0x000000ea
+#define NV50_SURFACE_FORMAT_R8G8_SNORM                         0x000000eb
+#define NV50_SURFACE_FORMAT_R8G8_SINT                          0x000000ec
+#define NV50_SURFACE_FORMAT_R8G8_UINT                          0x000000ed
+#define NV50_SURFACE_FORMAT_R16_UNORM                          0x000000ee
+#define NV50_SURFACE_FORMAT_R16_SNORM                          0x000000ef
+#define NV50_SURFACE_FORMAT_R16_SINT                           0x000000f0
+#define NV50_SURFACE_FORMAT_R16_UINT                           0x000000f1
+#define NV50_SURFACE_FORMAT_R16_FLOAT                          0x000000f2
+#define NV50_SURFACE_FORMAT_R8_UNORM                           0x000000f3
+#define NV50_SURFACE_FORMAT_R8_SNORM                           0x000000f4
+#define NV50_SURFACE_FORMAT_R8_SINT                            0x000000f5
+#define NV50_SURFACE_FORMAT_R8_UINT                            0x000000f6
+#define NV50_SURFACE_FORMAT_A8_UNORM                           0x000000f7
+#define NV50_SURFACE_FORMAT_X1R5G5B5_UNORM                     0x000000f8
+#define NV50_SURFACE_FORMAT_X8B8G8R8_UNORM                     0x000000f9
+#define NV50_SURFACE_FORMAT_X8B8G8R8_SRGB                      0x000000fa
+#define NV50_ZETA_FORMAT_Z32_FLOAT                             0x0000000a
+#define NV50_ZETA_FORMAT_Z16_UNORM                             0x00000013
+#define NV50_ZETA_FORMAT_Z24S8_UNORM                           0x00000014
+#define NV50_ZETA_FORMAT_X8Z24_UNORM                           0x00000015
+#define NV50_ZETA_FORMAT_S8Z24_UNORM                           0x00000016
+#define NV50_ZETA_FORMAT_UNK18                                 0x00000018
+#define NV50_ZETA_FORMAT_Z32_FLOAT_X24S8_UNORM                 0x00000019
+#define NV50_ZETA_FORMAT_UNK1D                                 0x0000001d
+#define NV50_ZETA_FORMAT_UNK1E                                 0x0000001e
+#define NV50_ZETA_FORMAT_UNK1F                                 0x0000001f
+#define NV50_QUERY__SIZE                                       0x00000010
+#define NV50_QUERY_COUNTER                                     0x00000000
+
+#define NV50_QUERY_RES                                         0x00000004
+
+#define NV50_QUERY_TIME                                                0x00000008
+
+
+#endif /* NV50_DEFS_XML */
diff --git a/src/gallium/drivers/nvc0/nv50_texture.xml.h b/src/gallium/drivers/nvc0/nv50_texture.xml.h
new file mode 100644 (file)
index 0000000..9f83206
--- /dev/null
@@ -0,0 +1,259 @@
+#ifndef NV50_TEXTURE_XML
+#define NV50_TEXTURE_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nv50_texture.xml (   6871 bytes, from 2010-10-03 13:18:37)
+- copyright.xml    (   6498 bytes, from 2010-10-03 13:18:37)
+
+Copyright (C) 2006-2010 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin KoÅ›cielnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+#define NV50_TIC_MAP_ZERO                                      0x00000000
+#define NV50_TIC_MAP_C0                                                0x00000002
+#define NV50_TIC_MAP_C1                                                0x00000003
+#define NV50_TIC_MAP_C2                                                0x00000004
+#define NV50_TIC_MAP_C3                                                0x00000005
+#define NV50_TIC_MAP_ONE                                       0x00000007
+#define NV50_TIC_TYPE_SNORM                                    0x00000001
+#define NV50_TIC_TYPE_UNORM                                    0x00000002
+#define NV50_TIC_TYPE_SINT                                     0x00000003
+#define NV50_TIC_TYPE_UINT                                     0x00000004
+#define NV50_TIC_TYPE_SSCALED                                  0x00000005
+#define NV50_TIC_TYPE_USCALED                                  0x00000006
+#define NV50_TIC_TYPE_FLOAT                                    0x00000007
+#define NV50_TSC_WRAP_REPEAT                                   0x00000000
+#define NV50_TSC_WRAP_MIRROR_REPEAT                            0x00000001
+#define NV50_TSC_WRAP_CLAMP_TO_EDGE                            0x00000002
+#define NV50_TSC_WRAP_CLAMP_TO_BORDER                          0x00000003
+#define NV50_TSC_WRAP_CLAMP                                    0x00000004
+#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_EDGE                     0x00000005
+#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_BORDER                   0x00000006
+#define NV50_TSC_WRAP_MIRROR_CLAMP                             0x00000007
+#define NV50_TIC__SIZE                                         0x00000020
+#define NV50_TIC_0                                             0x00000000
+#define NV50_TIC_0_MAPA__MASK                                  0x38000000
+#define NV50_TIC_0_MAPA__SHIFT                                 27
+#define NV50_TIC_0_MAPB__MASK                                  0x07000000
+#define NV50_TIC_0_MAPB__SHIFT                                 24
+#define NV50_TIC_0_MAPG__MASK                                  0x00e00000
+#define NV50_TIC_0_MAPG__SHIFT                                 21
+#define NV50_TIC_0_MAPR__MASK                                  0x001c0000
+#define NV50_TIC_0_MAPR__SHIFT                                 18
+#define NV50_TIC_0_TYPE3__MASK                                 0x00038000
+#define NV50_TIC_0_TYPE3__SHIFT                                        15
+#define NV50_TIC_0_TYPE2__MASK                                 0x00007000
+#define NV50_TIC_0_TYPE2__SHIFT                                        12
+#define NV50_TIC_0_TYPE1__MASK                                 0x00000e00
+#define NV50_TIC_0_TYPE1__SHIFT                                        9
+#define NV50_TIC_0_TYPE0__MASK                                 0x000001c0
+#define NV50_TIC_0_TYPE0__SHIFT                                        6
+#define NV50_TIC_0_SWIZZLE__MASK                               0x3ffc0000
+#define NV50_TIC_0_FMT__MASK                                   0x0000003f
+#define NV50_TIC_0_FMT__SHIFT                                  0
+#define NV50_TIC_0_FMT_32_32_32_32                             0x00000001
+#define NV50_TIC_0_FMT_16_16_16_16                             0x00000003
+#define NV50_TIC_0_FMT_32_32                                   0x00000004
+#define NV50_TIC_0_FMT_32_8                                    0x00000005
+#define NV50_TIC_0_FMT_8_8_8_8                                 0x00000008
+#define NV50_TIC_0_FMT_2_10_10_10                              0x00000009
+#define NV50_TIC_0_FMT_16_16                                   0x0000000c
+#define NV50_TIC_0_FMT_8_24                                    0x0000000d
+#define NV50_TIC_0_FMT_24_8                                    0x0000000e
+#define NV50_TIC_0_FMT_32                                      0x0000000f
+#define NV50_TIC_0_FMT_4_4_4_4                                 0x00000012
+#define NV50_TIC_0_FMT_5_5_5_1                                 0x00000013
+#define NV50_TIC_0_FMT_1_5_5_5                                 0x00000014
+#define NV50_TIC_0_FMT_5_6_5                                   0x00000015
+#define NV50_TIC_0_FMT_6_5_5                                   0x00000016
+#define NV50_TIC_0_FMT_8_8                                     0x00000018
+#define NV50_TIC_0_FMT_16                                      0x0000001b
+#define NV50_TIC_0_FMT_8                                       0x0000001d
+#define NV50_TIC_0_FMT_4_4                                     0x0000001e
+#define NV50_TIC_0_FMT_UNK1F                                   0x0000001f
+#define NV50_TIC_0_FMT_E5_9_9_9                                        0x00000020
+#define NV50_TIC_0_FMT_10_11_11                                        0x00000021
+#define NV50_TIC_0_FMT_C1_C2_C1_C0                             0x00000022
+#define NV50_TIC_0_FMT_C2_C1_C0_C1                             0x00000023
+#define NV50_TIC_0_FMT_DXT1                                    0x00000024
+#define NV50_TIC_0_FMT_DXT3                                    0x00000025
+#define NV50_TIC_0_FMT_DXT5                                    0x00000026
+#define NV50_TIC_0_FMT_RGTC1                                   0x00000027
+#define NV50_TIC_0_FMT_RGTC2                                   0x00000028
+#define NV50_TIC_0_FMT_24_8_ZETA                               0x00000029
+#define NV50_TIC_0_FMT_8_24_ZETA                               0x0000002a
+#define NV50_TIC_0_FMT_UNK2C_ZETA                              0x0000002c
+#define NV50_TIC_0_FMT_UNK2D_ZETA                              0x0000002d
+#define NV50_TIC_0_FMT_UNK2E_ZETA                              0x0000002e
+#define NV50_TIC_0_FMT_32_ZETA                                 0x0000002f
+#define NV50_TIC_0_FMT_32_8_ZETA                               0x00000030
+#define NV50_TIC_0_FMT_16_ZETA                         0x0000003a
+
+#define NV50_TIC_1                                             0x00000004
+#define NV50_TIC_1_OFFSET_LOW__MASK                            0xffffffff
+#define NV50_TIC_1_OFFSET_LOW__SHIFT                           0
+
+#define NV50_TIC_2                                             0x00000008
+#define NV50_TIC_2_OFFSET_HIGH__MASK                           0x000000ff
+#define NV50_TIC_2_OFFSET_HIGH__SHIFT                          0
+#define NV50_TIC_2_COLORSPACE_SRGB                             0x00000400
+#define NV50_TIC_2_TARGET__MASK                                        0x0003c000
+#define NV50_TIC_2_TARGET__SHIFT                               14
+#define NV50_TIC_2_TARGET_1D                                   0x00000000
+#define NV50_TIC_2_TARGET_2D                                   0x00004000
+#define NV50_TIC_2_TARGET_3D                                   0x00008000
+#define NV50_TIC_2_TARGET_CUBE                                 0x0000c000
+#define NV50_TIC_2_TARGET_1D_ARRAY                             0x00010000
+#define NV50_TIC_2_TARGET_2D_ARRAY                             0x00014000
+#define NV50_TIC_2_TARGET_BUFFER                               0x00018000
+#define NV50_TIC_2_TARGET_RECT                                 0x0001c000
+#define NV50_TIC_2_TARGET_CUBE_ARRAY                           0x00020000
+#define NV50_TIC_2_TILE_MODE_LINEAR                            0x00040000
+#define NV50_TIC_2_TILE_MODE_Y__MASK                           0x01c00000
+#define NV50_TIC_2_TILE_MODE_Y__SHIFT                          22
+#define NV50_TIC_2_TILE_MODE_Z__MASK                           0x0e000000
+#define NV50_TIC_2_TILE_MODE_Z__SHIFT                          25
+#define NV50_TIC_2_2D_UNK0258__MASK                            0x30000000
+#define NV50_TIC_2_2D_UNK0258__SHIFT                           28
+#define NV50_TIC_2_NORMALIZED_COORDS                           0x80000000
+
+#define NV50_TIC_3                                             0x0000000c
+#define NV50_TIC_3_PITCH__MASK                                 0xffffffff
+#define NV50_TIC_3_PITCH__SHIFT                                        0
+
+#define NV50_TIC_4                                             0x00000010
+#define NV50_TIC_4_WIDTH__MASK                                 0xffffffff
+#define NV50_TIC_4_WIDTH__SHIFT                                        0
+
+#define NV50_TIC_5                                             0x00000014
+#define NV50_TIC_5_LAST_LEVEL__MASK                            0xf0000000
+#define NV50_TIC_5_LAST_LEVEL__SHIFT                           28
+#define NV50_TIC_5_DEPTH__MASK                                 0x0fff0000
+#define NV50_TIC_5_DEPTH__SHIFT                                        16
+#define NV50_TIC_5_HEIGHT__MASK                                        0x0000ffff
+#define NV50_TIC_5_HEIGHT__SHIFT                               0
+
+#define NV50_TIC_7                                             0x0000001c
+#define NV50_TIC_7_BASE_LEVEL__MASK                            0x0000000f
+#define NV50_TIC_7_BASE_LEVEL__SHIFT                           0
+#define NV50_TIC_7_MAX_LEVEL__MASK                             0x000000f0
+#define NV50_TIC_7_MAX_LEVEL__SHIFT                            4
+
+#define NV50_TSC__SIZE                                         0x00000020
+#define NV50_TSC_0                                             0x00000000
+#define NV50_TSC_0_WRAPS__MASK                                 0x00000007
+#define NV50_TSC_0_WRAPS__SHIFT                                        0
+#define NV50_TSC_0_WRAPT__MASK                                 0x00000038
+#define NV50_TSC_0_WRAPT__SHIFT                                        3
+#define NV50_TSC_0_WRAPR__MASK                                 0x000001c0
+#define NV50_TSC_0_WRAPR__SHIFT                                        6
+#define NV50_TSC_0_SHADOW_COMPARE_ENABLE                       0x00000200
+#define NV50_TSC_0_SHADOW_COMPARE_FUNC__MASK                   0x00001c00
+#define NV50_TSC_0_SHADOW_COMPARE_FUNC__SHIFT                  10
+#define NV50_TSC_0_ANISOTROPY_MASK__MASK                       0x00700000
+#define NV50_TSC_0_ANISOTROPY_MASK__SHIFT                      20
+
+#define NV50_TSC_1                                             0x00000004
+#define NV50_TSC_1_UNKN_ANISO_15                               0x10000000
+#define NV50_TSC_1_UNKN_ANISO_35                               0x18000000
+#define NV50_TSC_1_MAGF__MASK                                  0x00000003
+#define NV50_TSC_1_MAGF__SHIFT                                 0
+#define NV50_TSC_1_MAGF_NEAREST                                        0x00000001
+#define NV50_TSC_1_MAGF_LINEAR                                 0x00000002
+#define NV50_TSC_1_MINF__MASK                                  0x00000030
+#define NV50_TSC_1_MINF__SHIFT                                 4
+#define NV50_TSC_1_MINF_NEAREST                                        0x00000010
+#define NV50_TSC_1_MINF_LINEAR                                 0x00000020
+#define NV50_TSC_1_MIPF__MASK                                  0x000000c0
+#define NV50_TSC_1_MIPF__SHIFT                                 6
+#define NV50_TSC_1_MIPF_NONE                                   0x00000040
+#define NV50_TSC_1_MIPF_NEAREST                                        0x00000080
+#define NV50_TSC_1_MIPF_LINEAR                                 0x000000c0
+#define NV50_TSC_1_LOD_BIAS__MASK                              0x01fff000
+#define NV50_TSC_1_LOD_BIAS__SHIFT                             12
+
+#define NV50_TSC_2                                             0x00000008
+#define NV50_TSC_2_MIN_LOD__MASK                               0x00000f00
+#define NV50_TSC_2_MIN_LOD__SHIFT                              8
+#define NV50_TSC_2_MAX_LOD__MASK                               0x00f00000
+#define NV50_TSC_2_MAX_LOD__SHIFT                              20
+
+#define NV50_TSC_4                                             0x00000010
+#define NV50_TSC_4_BORDER_COLOR_RED__MASK                      0xffffffff
+#define NV50_TSC_4_BORDER_COLOR_RED__SHIFT                     0
+
+#define NV50_TSC_5                                             0x00000014
+#define NV50_TSC_5_BORDER_COLOR_GREEN__MASK                    0xffffffff
+#define NV50_TSC_5_BORDER_COLOR_GREEN__SHIFT                   0
+
+#define NV50_TSC_6                                             0x00000018
+#define NV50_TSC_6_BORDER_COLOR_BLUE__MASK                     0xffffffff
+#define NV50_TSC_6_BORDER_COLOR_BLUE__SHIFT                    0
+
+#define NV50_TSC_7                                             0x0000001c
+#define NV50_TSC_7_BORDER_COLOR_ALPHA__MASK                    0xffffffff
+#define NV50_TSC_7_BORDER_COLOR_ALPHA__SHIFT                   0
+
+
+#endif /* NV50_TEXTURE_XML */
diff --git a/src/gallium/drivers/nvc0/nvc0_2d.xml.h b/src/gallium/drivers/nvc0/nvc0_2d.xml.h
new file mode 100644 (file)
index 0000000..aebcd51
--- /dev/null
@@ -0,0 +1,380 @@
+#ifndef NVC0_2D_XML
+#define NVC0_2D_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nvc0_2d.xml    (   9454 bytes, from 2010-10-16 16:03:11)
+- copyright.xml  (   6498 bytes, from 2010-10-03 13:18:37)
+- nv_object.xml  (  11379 bytes, from 2010-10-16 11:43:24)
+- nvchipsets.xml (   2907 bytes, from 2010-10-15 16:28:21)
+- nv_defs.xml    (   4437 bytes, from 2010-07-06 07:43:58)
+- nv50_defs.xml  (   4482 bytes, from 2010-10-03 13:18:37)
+
+Copyright (C) 2006-2010 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin KoÅ›cielnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+#define NVC0_2D_DST_FORMAT                                     0x00000200
+
+#define NVC0_2D_DST_LINEAR                                     0x00000204
+
+#define NVC0_2D_DST_TILE_MODE                                  0x00000208
+
+#define NVC0_2D_DST_DEPTH                                      0x0000020c
+
+#define NVC0_2D_DST_LAYER                                      0x00000210
+
+#define NVC0_2D_DST_PITCH                                      0x00000214
+
+#define NVC0_2D_DST_WIDTH                                      0x00000218
+
+#define NVC0_2D_DST_HEIGHT                                     0x0000021c
+
+#define NVC0_2D_DST_ADDRESS_HIGH                               0x00000220
+
+#define NVC0_2D_DST_ADDRESS_LOW                                        0x00000224
+
+#define NVC0_2D_UNK228                                         0x00000228
+
+#define NVC0_2D_SRC_FORMAT                                     0x00000230
+
+#define NVC0_2D_SRC_LINEAR                                     0x00000234
+
+#define NVC0_2D_SRC_TILE_MODE                                  0x00000238
+
+#define NVC0_2D_SRC_DEPTH                                      0x0000023c
+
+#define NVC0_2D_SRC_LAYER                                      0x00000240
+
+#define NVC0_2D_SRC_PITCH                                      0x00000244
+#define NVC0_2D_SRC_PITCH__MAX                                 0x00040000
+
+#define NVC0_2D_SRC_WIDTH                                      0x00000248
+#define NVC0_2D_SRC_WIDTH__MAX                                 0x00010000
+
+#define NVC0_2D_SRC_HEIGHT                                     0x0000024c
+#define NVC0_2D_SRC_HEIGHT__MAX                                        0x00010000
+
+#define NVC0_2D_SRC_ADDRESS_HIGH                               0x00000250
+
+#define NVC0_2D_SRC_ADDRESS_LOW                                        0x00000254
+
+#define NVC0_2D_UNK258                                         0x00000258
+
+#define NVC0_2D_UNK260                                         0x00000260
+
+#define NVC0_2D_COND_ADDRESS_HIGH                              0x00000264
+
+#define NVC0_2D_COND_ADDRESS_LOW                               0x00000268
+
+#define NVC0_2D_COND_MODE                                      0x0000026c
+#define NVC0_2D_COND_MODE_NEVER                                        0x00000000
+#define NVC0_2D_COND_MODE_ALWAYS                               0x00000001
+#define NVC0_2D_COND_MODE_RES_NON_ZERO                         0x00000002
+#define NVC0_2D_COND_MODE_EQUAL                                        0x00000003
+#define NVC0_2D_COND_MODE_NOT_EQUAL                            0x00000004
+
+#define NVC0_2D_CLIP_X                                         0x00000280
+
+#define NVC0_2D_CLIP_Y                                         0x00000284
+
+#define NVC0_2D_CLIP_W                                         0x00000288
+
+#define NVC0_2D_CLIP_H                                         0x0000028c
+
+#define NVC0_2D_CLIP_ENABLE                                    0x00000290
+
+#define NVC0_2D_COLOR_KEY_FORMAT                               0x00000294
+#define NVC0_2D_COLOR_KEY_FORMAT_16BPP                         0x00000000
+#define NVC0_2D_COLOR_KEY_FORMAT_15BPP                         0x00000001
+#define NVC0_2D_COLOR_KEY_FORMAT_24BPP                         0x00000002
+#define NVC0_2D_COLOR_KEY_FORMAT_30BPP                         0x00000003
+#define NVC0_2D_COLOR_KEY_FORMAT_8BPP                          0x00000004
+#define NVC0_2D_COLOR_KEY_FORMAT_16BPP2                                0x00000005
+#define NVC0_2D_COLOR_KEY_FORMAT_32BPP                         0x00000006
+
+#define NVC0_2D_COLOR_KEY                                      0x00000298
+
+#define NVC0_2D_COLOR_KEY_ENABLE                               0x0000029c
+
+#define NVC0_2D_ROP                                            0x000002a0
+
+#define NVC0_2D_BETA1                                          0x000002a4
+
+#define NVC0_2D_BETA4                                          0x000002a8
+
+#define NVC0_2D_OPERATION                                      0x000002ac
+#define NVC0_2D_OPERATION_SRCCOPY_AND                          0x00000000
+#define NVC0_2D_OPERATION_ROP_AND                              0x00000001
+#define NVC0_2D_OPERATION_BLEND_AND                            0x00000002
+#define NVC0_2D_OPERATION_SRCCOPY                              0x00000003
+#define NVC0_2D_OPERATION_UNK4                                 0x00000004
+#define NVC0_2D_OPERATION_SRCCOPY_PREMULT                      0x00000005
+#define NVC0_2D_OPERATION_BLEND_PREMULT                                0x00000006
+
+#define NVC0_2D_UNK2B0                                         0x000002b0
+#define NVC0_2D_UNK2B0_UNK0__MASK                              0x0000003f
+#define NVC0_2D_UNK2B0_UNK0__SHIFT                             0
+#define NVC0_2D_UNK2B0_UNK1__MASK                              0x00003f00
+#define NVC0_2D_UNK2B0_UNK1__SHIFT                             8
+
+#define NVC0_2D_PATTERN_SELECT                                 0x000002b4
+#define NVC0_2D_PATTERN_SELECT_MONO_8X8                                0x00000000
+#define NVC0_2D_PATTERN_SELECT_MONO_64X1                       0x00000001
+#define NVC0_2D_PATTERN_SELECT_MONO_1X64                       0x00000002
+#define NVC0_2D_PATTERN_SELECT_COLOR                           0x00000003
+
+#define NVC0_2D_PATTERN_COLOR_FORMAT                           0x000002e8
+#define NVC0_2D_PATTERN_COLOR_FORMAT_16BPP                     0x00000000
+#define NVC0_2D_PATTERN_COLOR_FORMAT_15BPP                     0x00000001
+#define NVC0_2D_PATTERN_COLOR_FORMAT_32BPP                     0x00000002
+#define NVC0_2D_PATTERN_COLOR_FORMAT_8BPP                      0x00000003
+#define NVC0_2D_PATTERN_COLOR_FORMAT_UNK4                      0x00000004
+#define NVC0_2D_PATTERN_COLOR_FORMAT_UNK5                      0x00000005
+
+#define NVC0_2D_PATTERN_MONO_FORMAT                            0x000002ec
+#define NVC0_2D_PATTERN_MONO_FORMAT_CGA6                       0x00000000
+#define NVC0_2D_PATTERN_MONO_FORMAT_LE                         0x00000001
+
+#define NVC0_2D_PATTERN_COLOR(i0)                             (0x000002f0 + 0x4*(i0))
+#define NVC0_2D_PATTERN_COLOR__ESIZE                           0x00000004
+#define NVC0_2D_PATTERN_COLOR__LEN                             0x00000002
+
+#define NVC0_2D_PATTERN_BITMAP(i0)                            (0x000002f8 + 0x4*(i0))
+#define NVC0_2D_PATTERN_BITMAP__ESIZE                          0x00000004
+#define NVC0_2D_PATTERN_BITMAP__LEN                            0x00000002
+
+#define NVC0_2D_PATTERN_X8R8G8B8(i0)                          (0x00000300 + 0x4*(i0))
+#define NVC0_2D_PATTERN_X8R8G8B8__ESIZE                                0x00000004
+#define NVC0_2D_PATTERN_X8R8G8B8__LEN                          0x00000040
+#define NVC0_2D_PATTERN_X8R8G8B8_B__MASK                       0x000000ff
+#define NVC0_2D_PATTERN_X8R8G8B8_B__SHIFT                      0
+#define NVC0_2D_PATTERN_X8R8G8B8_G__MASK                       0x0000ff00
+#define NVC0_2D_PATTERN_X8R8G8B8_G__SHIFT                      8
+#define NVC0_2D_PATTERN_X8R8G8B8_R__MASK                       0x00ff0000
+#define NVC0_2D_PATTERN_X8R8G8B8_R__SHIFT                      16
+
+#define NVC0_2D_PATTERN_R5G6B5(i0)                            (0x00000400 + 0x4*(i0))
+#define NVC0_2D_PATTERN_R5G6B5__ESIZE                          0x00000004
+#define NVC0_2D_PATTERN_R5G6B5__LEN                            0x00000020
+#define NVC0_2D_PATTERN_R5G6B5_B0__MASK                                0x0000001f
+#define NVC0_2D_PATTERN_R5G6B5_B0__SHIFT                       0
+#define NVC0_2D_PATTERN_R5G6B5_G0__MASK                                0x000007e0
+#define NVC0_2D_PATTERN_R5G6B5_G0__SHIFT                       5
+#define NVC0_2D_PATTERN_R5G6B5_R0__MASK                                0x0000f800
+#define NVC0_2D_PATTERN_R5G6B5_R0__SHIFT                       11
+#define NVC0_2D_PATTERN_R5G6B5_B1__MASK                                0x001f0000
+#define NVC0_2D_PATTERN_R5G6B5_B1__SHIFT                       16
+#define NVC0_2D_PATTERN_R5G6B5_G1__MASK                                0x07e00000
+#define NVC0_2D_PATTERN_R5G6B5_G1__SHIFT                       21
+#define NVC0_2D_PATTERN_R5G6B5_R1__MASK                                0xf8000000
+#define NVC0_2D_PATTERN_R5G6B5_R1__SHIFT                       27
+
+#define NVC0_2D_PATTERN_X1R5G5B5(i0)                          (0x00000480 + 0x4*(i0))
+#define NVC0_2D_PATTERN_X1R5G5B5__ESIZE                                0x00000004
+#define NVC0_2D_PATTERN_X1R5G5B5__LEN                          0x00000020
+#define NVC0_2D_PATTERN_X1R5G5B5_B0__MASK                      0x0000001f
+#define NVC0_2D_PATTERN_X1R5G5B5_B0__SHIFT                     0
+#define NVC0_2D_PATTERN_X1R5G5B5_G0__MASK                      0x000003e0
+#define NVC0_2D_PATTERN_X1R5G5B5_G0__SHIFT                     5
+#define NVC0_2D_PATTERN_X1R5G5B5_R0__MASK                      0x00007c00
+#define NVC0_2D_PATTERN_X1R5G5B5_R0__SHIFT                     10
+#define NVC0_2D_PATTERN_X1R5G5B5_B1__MASK                      0x001f0000
+#define NVC0_2D_PATTERN_X1R5G5B5_B1__SHIFT                     16
+#define NVC0_2D_PATTERN_X1R5G5B5_G1__MASK                      0x03e00000
+#define NVC0_2D_PATTERN_X1R5G5B5_G1__SHIFT                     21
+#define NVC0_2D_PATTERN_X1R5G5B5_R1__MASK                      0x7c000000
+#define NVC0_2D_PATTERN_X1R5G5B5_R1__SHIFT                     26
+
+#define NVC0_2D_PATTERN_Y8(i0)                                (0x00000500 + 0x4*(i0))
+#define NVC0_2D_PATTERN_Y8__ESIZE                              0x00000004
+#define NVC0_2D_PATTERN_Y8__LEN                                        0x00000010
+#define NVC0_2D_PATTERN_Y8_Y0__MASK                            0x000000ff
+#define NVC0_2D_PATTERN_Y8_Y0__SHIFT                           0
+#define NVC0_2D_PATTERN_Y8_Y1__MASK                            0x0000ff00
+#define NVC0_2D_PATTERN_Y8_Y1__SHIFT                           8
+#define NVC0_2D_PATTERN_Y8_Y2__MASK                            0x00ff0000
+#define NVC0_2D_PATTERN_Y8_Y2__SHIFT                           16
+#define NVC0_2D_PATTERN_Y8_Y3__MASK                            0xff000000
+#define NVC0_2D_PATTERN_Y8_Y3__SHIFT                           24
+
+#define NVC0_2D_DRAW_SHAPE                                     0x00000580
+#define NVC0_2D_DRAW_SHAPE_POINTS                              0x00000000
+#define NVC0_2D_DRAW_SHAPE_LINES                               0x00000001
+#define NVC0_2D_DRAW_SHAPE_LINE_STRIP                          0x00000002
+#define NVC0_2D_DRAW_SHAPE_TRIANGLES                           0x00000003
+#define NVC0_2D_DRAW_SHAPE_RECTANGLES                          0x00000004
+
+#define NVC0_2D_DRAW_COLOR_FORMAT                              0x00000584
+
+#define NVC0_2D_DRAW_COLOR                                     0x00000588
+
+#define NVC0_2D_UNK58C                                         0x0000058c
+#define NVC0_2D_UNK58C_0                                       0x00000001
+#define NVC0_2D_UNK58C_1                                       0x00000010
+#define NVC0_2D_UNK58C_2                                       0x00000100
+#define NVC0_2D_UNK58C_3                                       0x00001000
+
+#define NVC0_2D_DRAW_POINT16                                   0x000005e0
+#define NVC0_2D_DRAW_POINT16_X__MASK                           0x0000ffff
+#define NVC0_2D_DRAW_POINT16_X__SHIFT                          0
+#define NVC0_2D_DRAW_POINT16_Y__MASK                           0xffff0000
+#define NVC0_2D_DRAW_POINT16_Y__SHIFT                          16
+
+#define NVC0_2D_DRAW_POINT32_X(i0)                            (0x00000600 + 0x8*(i0))
+#define NVC0_2D_DRAW_POINT32_X__ESIZE                          0x00000008
+#define NVC0_2D_DRAW_POINT32_X__LEN                            0x00000040
+
+#define NVC0_2D_DRAW_POINT32_Y(i0)                            (0x00000604 + 0x8*(i0))
+#define NVC0_2D_DRAW_POINT32_Y__ESIZE                          0x00000008
+#define NVC0_2D_DRAW_POINT32_Y__LEN                            0x00000040
+
+#define NVC0_2D_SIFC_BITMAP_ENABLE                             0x00000800
+
+#define NVC0_2D_SIFC_FORMAT                                    0x00000804
+
+#define NVC0_2D_SIFC_BITMAP_FORMAT                             0x00000808
+#define NVC0_2D_SIFC_BITMAP_FORMAT_I1                          0x00000000
+#define NVC0_2D_SIFC_BITMAP_FORMAT_I4                          0x00000001
+#define NVC0_2D_SIFC_BITMAP_FORMAT_I8                          0x00000002
+
+#define NVC0_2D_SIFC_BITMAP_LSB_FIRST                          0x0000080c
+
+#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE                     0x00000810
+#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_PACKED              0x00000000
+#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_BYTE          0x00000001
+#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_WORD          0x00000002
+
+#define NVC0_2D_SIFC_BITMAP_COLOR_BIT0                         0x00000814
+
+#define NVC0_2D_SIFC_BITMAP_COLOR_BIT1                         0x00000818
+
+#define NVC0_2D_SIFC_BITMAP_WRITE_BIT0_ENABLE                  0x0000081c
+
+#define NVC0_2D_SIFC_WIDTH                                     0x00000838
+
+#define NVC0_2D_SIFC_HEIGHT                                    0x0000083c
+
+#define NVC0_2D_SIFC_DX_DU_FRACT                               0x00000840
+
+#define NVC0_2D_SIFC_DX_DU_INT                                 0x00000844
+
+#define NVC0_2D_SIFC_DY_DV_FRACT                               0x00000848
+
+#define NVC0_2D_SIFC_DY_DV_INT                                 0x0000084c
+
+#define NVC0_2D_SIFC_DST_X_FRACT                               0x00000850
+
+#define NVC0_2D_SIFC_DST_X_INT                                 0x00000854
+
+#define NVC0_2D_SIFC_DST_Y_FRACT                               0x00000858
+
+#define NVC0_2D_SIFC_DST_Y_INT                                 0x0000085c
+
+#define NVC0_2D_SIFC_DATA                                      0x00000860
+
+#define NVC0_2D_UNK0870                                                0x00000870
+
+#define NVC0_2D_UNK0880                                                0x00000880
+
+#define NVC0_2D_UNK0884                                                0x00000884
+
+#define NVC0_2D_UNK0888                                                0x00000888
+
+#define NVC0_2D_BLIT_CONTROL                                   0x0000088c
+#define NVC0_2D_BLIT_CONTROL_ORIGIN__MASK                      0x00000001
+#define NVC0_2D_BLIT_CONTROL_ORIGIN__SHIFT                     0
+#define NVC0_2D_BLIT_CONTROL_ORIGIN_CENTER                     0x00000000
+#define NVC0_2D_BLIT_CONTROL_ORIGIN_CORNER                     0x00000001
+#define NVC0_2D_BLIT_CONTROL_FILTER__MASK                      0x00000010
+#define NVC0_2D_BLIT_CONTROL_FILTER__SHIFT                     4
+#define NVC0_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE               0x00000000
+#define NVC0_2D_BLIT_CONTROL_FILTER_BILINEAR                   0x00000010
+
+#define NVC0_2D_BLIT_DST_X                                     0x000008b0
+
+#define NVC0_2D_BLIT_DST_Y                                     0x000008b4
+
+#define NVC0_2D_BLIT_DST_W                                     0x000008b8
+
+#define NVC0_2D_BLIT_DST_H                                     0x000008bc
+
+#define NVC0_2D_BLIT_DU_DX_FRACT                               0x000008c0
+
+#define NVC0_2D_BLIT_DU_DX_INT                                 0x000008c4
+
+#define NVC0_2D_BLIT_DV_DY_FRACT                               0x000008c8
+
+#define NVC0_2D_BLIT_DV_DY_INT                                 0x000008cc
+
+#define NVC0_2D_BLIT_SRC_X_FRACT                               0x000008d0
+
+#define NVC0_2D_BLIT_SRC_X_INT                                 0x000008d4
+
+#define NVC0_2D_BLIT_SRC_Y_FRACT                               0x000008d8
+
+#define NVC0_2D_BLIT_SRC_Y_INT                                 0x000008dc
+
+
+#endif /* NVC0_2D_XML */
diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h
new file mode 100644 (file)
index 0000000..b727a8c
--- /dev/null
@@ -0,0 +1,1067 @@
+#ifndef NVC0_3D_XML
+#define NVC0_3D_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nvc0_3d.xml    (  26726 bytes, from 2010-10-22 00:29:01)
+- copyright.xml  (   6498 bytes, from 2010-09-30 18:32:24)
+- nv_defs.xml    (   4437 bytes, from 2010-07-24 13:13:40)
+- nv_3ddefs.xml  (  16394 bytes, from 2010-10-11 14:37:46)
+- nv_object.xml  (  11357 bytes, from 2010-10-19 20:33:50)
+- nvchipsets.xml (   2907 bytes, from 2010-10-12 17:28:45)
+- nv50_defs.xml  (   4482 bytes, from 2010-10-03 10:27:25)
+
+Copyright (C) 2006-2010 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin KoÅ›cielnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+#define NVC0_3D_NOTIFY_ADDRESS_HIGH                            0x00000104
+#define NVC0_3D_NOTIFY_ADDRESS_LOW                             0x00000108
+#define NVC0_3D_NOTIFY                                         0x0000010c
+
+#define NVC0_3D_SERIALIZE                                      0x00000110
+
+#define NVC0_3D_EARLY_FRAGMENT_TESTS                           0x00000210
+
+#define NVC0_3D_TESS_MODE                                      0x00000320
+#define NVC0_3D_TESS_MODE_PRIM__MASK                           0x0000000f
+#define NVC0_3D_TESS_MODE_PRIM__SHIFT                          0
+#define NVC0_3D_TESS_MODE_PRIM_ISOLINES                                0x00000000
+#define NVC0_3D_TESS_MODE_PRIM_TRIANGLES                       0x00000001
+#define NVC0_3D_TESS_MODE_PRIM_QUADS                           0x00000002
+#define NVC0_3D_TESS_MODE_SPACING__MASK                                0x000000f0
+#define NVC0_3D_TESS_MODE_SPACING__SHIFT                       4
+#define NVC0_3D_TESS_MODE_SPACING_EQUAL                                0x00000000
+#define NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_ODD               0x00000010
+#define NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_EVEN              0x00000020
+#define NVC0_3D_TESS_MODE_CW                                   0x00000100
+#define NVC0_3D_TESS_MODE_CONNECTED                            0x00000200
+
+#define NVC0_3D_TESS_LEVEL_OUTER(i0)                          (0x00000324 + 0x4*(i0))
+#define NVC0_3D_TESS_LEVEL_OUTER__ESIZE                                0x00000004
+#define NVC0_3D_TESS_LEVEL_OUTER__LEN                          0x00000004
+
+#define NVC0_3D_TESS_LEVEL_INNER(i0)                          (0x00000334 + 0x4*(i0))
+#define NVC0_3D_TESS_LEVEL_INNER__ESIZE                                0x00000004
+#define NVC0_3D_TESS_LEVEL_INNER__LEN                          0x00000002
+
+#define NVC0_3D_RASTERIZE_ENABLE                               0x0000037c
+
+#define NVC0_3D_TFB(i0)                                               (0x00000380 + 0x20*(i0))
+#define NVC0_3D_TFB__ESIZE                                     0x00000020
+#define NVC0_3D_TFB__LEN                                       0x00000004
+
+#define NVC0_3D_TFB_BUFFER_ENABLE(i0)                         (0x00000380 + 0x20*(i0))
+
+#define NVC0_3D_TFB_ADDRESS_HIGH(i0)                          (0x00000384 + 0x20*(i0))
+
+#define NVC0_3D_TFB_ADDRESS_LOW(i0)                           (0x00000388 + 0x20*(i0))
+
+#define NVC0_3D_TFB_BUFFER_SIZE(i0)                           (0x0000038c + 0x20*(i0))
+
+#define NVC0_3D_TFB_PRIMITIVE_ID(i0)                          (0x00000390 + 0x20*(i0))
+
+#define NVC0_3D_TFB_UNK0700(i0)                                       (0x00000700 + 0x10*(i0))
+
+#define NVC0_3D_TFB_VARYING_COUNT(i0)                         (0x00000704 + 0x10*(i0))
+
+#define NVC0_3D_TFB_BUFFER_STRIDE(i0)                         (0x00000708 + 0x10*(i0))
+
+#define NVC0_3D_TFB_ENABLE                                     0x00000744
+
+#define NVC0_3D_LOCAL_BASE                                     0x0000077c
+
+#define NVC0_3D_LOCAL_ADDRESS_HIGH                             0x00000790
+
+#define NVC0_3D_LOCAL_ADDRESS_LOW                              0x00000794
+
+#define NVC0_3D_LOCAL_SIZE_HIGH                                        0x00000798
+
+#define NVC0_3D_LOCAL_SIZE_LOW                                 0x0000079c
+
+#define NVC0_3D_RT(i0)                                        (0x00000800 + 0x20*(i0))
+#define NVC0_3D_RT__ESIZE                                      0x00000020
+#define NVC0_3D_RT__LEN                                                0x00000008
+
+#define NVC0_3D_RT_ADDRESS_HIGH(i0)                           (0x00000800 + 0x20*(i0))
+
+#define NVC0_3D_RT_ADDRESS_LOW(i0)                            (0x00000804 + 0x20*(i0))
+
+#define NVC0_3D_RT_HORIZ(i0)                                  (0x00000808 + 0x20*(i0))
+
+#define NVC0_3D_RT_VERT(i0)                                   (0x0000080c + 0x20*(i0))
+
+#define NVC0_3D_RT_FORMAT(i0)                                 (0x00000810 + 0x20*(i0))
+
+#define NVC0_3D_RT_TILE_MODE(i0)                              (0x00000814 + 0x20*(i0))
+#define NVC0_3D_RT_TILE_MODE_UNK0                              0x00000001
+#define NVC0_3D_RT_TILE_MODE_Y__MASK                           0x00000070
+#define NVC0_3D_RT_TILE_MODE_Y__SHIFT                          4
+#define NVC0_3D_RT_TILE_MODE_Z__MASK                           0x00000700
+#define NVC0_3D_RT_TILE_MODE_Z__SHIFT                          8
+
+#define NVC0_3D_RT_ARRAY_MODE(i0)                             (0x00000818 + 0x20*(i0))
+#define NVC0_3D_RT_ARRAY_MODE_LAYERS__MASK                     0x0000ffff
+#define NVC0_3D_RT_ARRAY_MODE_LAYERS__SHIFT                    0
+#define NVC0_3D_RT_ARRAY_MODE_VOLUME                           0x00010000
+
+#define NVC0_3D_RT_LAYER_STRIDE(i0)                           (0x0000081c + 0x20*(i0))
+
+#define NVC0_3D_VIEWPORT_SCALE_X(i0)                          (0x00000a00 + 0x20*(i0))
+#define NVC0_3D_VIEWPORT_SCALE_X__ESIZE                                0x00000020
+#define NVC0_3D_VIEWPORT_SCALE_X__LEN                          0x00000010
+
+#define NVC0_3D_VIEWPORT_SCALE_Y(i0)                          (0x00000a04 + 0x20*(i0))
+#define NVC0_3D_VIEWPORT_SCALE_Y__ESIZE                                0x00000020
+#define NVC0_3D_VIEWPORT_SCALE_Y__LEN                          0x00000010
+
+#define NVC0_3D_VIEWPORT_SCALE_Z(i0)                          (0x00000a08 + 0x20*(i0))
+#define NVC0_3D_VIEWPORT_SCALE_Z__ESIZE                                0x00000020
+#define NVC0_3D_VIEWPORT_SCALE_Z__LEN                          0x00000010
+
+#define NVC0_3D_VIEWPORT_TRANSLATE_X(i0)                      (0x00000a0c + 0x20*(i0))
+#define NVC0_3D_VIEWPORT_TRANSLATE_X__ESIZE                    0x00000020
+#define NVC0_3D_VIEWPORT_TRANSLATE_X__LEN                      0x00000010
+
+#define NVC0_3D_VIEWPORT_TRANSLATE_Y(i0)                      (0x00000a10 + 0x20*(i0))
+#define NVC0_3D_VIEWPORT_TRANSLATE_Y__ESIZE                    0x00000020
+#define NVC0_3D_VIEWPORT_TRANSLATE_Y__LEN                      0x00000010
+
+#define NVC0_3D_VIEWPORT_TRANSLATE_Z(i0)                      (0x00000a14 + 0x20*(i0))
+#define NVC0_3D_VIEWPORT_TRANSLATE_Z__ESIZE                    0x00000020
+#define NVC0_3D_VIEWPORT_TRANSLATE_Z__LEN                      0x00000010
+
+#define NVC0_3D_VIEWPORT_HORIZ(i0)                            (0x00000c00 + 0x10*(i0))
+#define NVC0_3D_VIEWPORT_HORIZ__ESIZE                          0x00000010
+#define NVC0_3D_VIEWPORT_HORIZ__LEN                            0x00000010
+#define NVC0_3D_VIEWPORT_HORIZ_X__MASK                         0x0000ffff
+#define NVC0_3D_VIEWPORT_HORIZ_X__SHIFT                                0
+#define NVC0_3D_VIEWPORT_HORIZ_W__MASK                         0xffff0000
+#define NVC0_3D_VIEWPORT_HORIZ_W__SHIFT                                16
+
+#define NVC0_3D_VIEWPORT_VERT(i0)                             (0x00000c04 + 0x10*(i0))
+#define NVC0_3D_VIEWPORT_VERT__ESIZE                           0x00000010
+#define NVC0_3D_VIEWPORT_VERT__LEN                             0x00000010
+#define NVC0_3D_VIEWPORT_VERT_Y__MASK                          0x0000ffff
+#define NVC0_3D_VIEWPORT_VERT_Y__SHIFT                         0
+#define NVC0_3D_VIEWPORT_VERT_H__MASK                          0xffff0000
+#define NVC0_3D_VIEWPORT_VERT_H__SHIFT                         16
+
+#define NVC0_3D_DEPTH_RANGE_NEAR(i0)                          (0x00000c08 + 0x10*(i0))
+#define NVC0_3D_DEPTH_RANGE_NEAR__ESIZE                                0x00000010
+#define NVC0_3D_DEPTH_RANGE_NEAR__LEN                          0x00000010
+
+#define NVC0_3D_DEPTH_RANGE_FAR(i0)                           (0x00000c0c + 0x10*(i0))
+#define NVC0_3D_DEPTH_RANGE_FAR__ESIZE                         0x00000010
+#define NVC0_3D_DEPTH_RANGE_FAR__LEN                           0x00000010
+
+#define NVC0_3D_VIEWPORT_CLIP_HORIZ(i0)                               (0x00000d00 + 0x8*(i0))
+#define NVC0_3D_VIEWPORT_CLIP_HORIZ__ESIZE                     0x00000008
+#define NVC0_3D_VIEWPORT_CLIP_HORIZ__LEN                       0x00000008
+#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MIN__MASK                  0x0000ffff
+#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MIN__SHIFT                 0
+#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MAX__MASK                  0xffff0000
+#define NVC0_3D_VIEWPORT_CLIP_HORIZ_MAX__SHIFT                 16
+
+#define NVC0_3D_VIEWPORT_CLIP_VERT(i0)                        (0x00000d04 + 0x8*(i0))
+#define NVC0_3D_VIEWPORT_CLIP_VERT__ESIZE                      0x00000008
+#define NVC0_3D_VIEWPORT_CLIP_VERT__LEN                                0x00000008
+#define NVC0_3D_VIEWPORT_CLIP_VERT_MIN__MASK                   0x0000ffff
+#define NVC0_3D_VIEWPORT_CLIP_VERT_MIN__SHIFT                  0
+#define NVC0_3D_VIEWPORT_CLIP_VERT_MAX__MASK                   0xffff0000
+#define NVC0_3D_VIEWPORT_CLIP_VERT_MAX__SHIFT                  16
+
+#define NVC0_3D_CLIPID_REGION_HORIZ(i0)                               (0x00000d40 + 0x8*(i0))
+#define NVC0_3D_CLIPID_REGION_HORIZ__ESIZE                     0x00000008
+#define NVC0_3D_CLIPID_REGION_HORIZ__LEN                       0x00000004
+#define NVC0_3D_CLIPID_REGION_HORIZ_X__MASK                    0x0000ffff
+#define NVC0_3D_CLIPID_REGION_HORIZ_X__SHIFT                   0
+#define NVC0_3D_CLIPID_REGION_HORIZ_W__MASK                    0xffff0000
+#define NVC0_3D_CLIPID_REGION_HORIZ_W__SHIFT                   16
+
+#define NVC0_3D_CLIPID_REGION_VERT(i0)                        (0x00000d44 + 0x8*(i0))
+#define NVC0_3D_CLIPID_REGION_VERT__ESIZE                      0x00000008
+#define NVC0_3D_CLIPID_REGION_VERT__LEN                                0x00000004
+#define NVC0_3D_CLIPID_REGION_VERT_Y__MASK                     0x0000ffff
+#define NVC0_3D_CLIPID_REGION_VERT_Y__SHIFT                    0
+#define NVC0_3D_CLIPID_REGION_VERT_H__MASK                     0xffff0000
+#define NVC0_3D_CLIPID_REGION_VERT_H__SHIFT                    16
+
+#define NVC0_3D_VERTEX_BUFFER_FIRST                            0x00000d74
+
+#define NVC0_3D_VERTEX_BUFFER_COUNT                            0x00000d78
+
+#define NVC0_3D_CLEAR_COLOR(i0)                                       (0x00000d80 + 0x4*(i0))
+#define NVC0_3D_CLEAR_COLOR__ESIZE                             0x00000004
+#define NVC0_3D_CLEAR_COLOR__LEN                               0x00000004
+
+#define NVC0_3D_CLEAR_DEPTH                                    0x00000d90
+
+#define NVC0_3D_CLEAR_STENCIL                                  0x00000da0
+
+#define NVC0_3D_POLYGON_SMOOTH_ENABLE                          0x00000db4
+
+#define NVC0_3D_POLYGON_OFFSET_POINT_ENABLE                    0x00000dc0
+
+#define NVC0_3D_POLYGON_OFFSET_LINE_ENABLE                     0x00000dc4
+
+#define NVC0_3D_POLYGON_OFFSET_FILL_ENABLE                     0x00000dc8
+
+#define NVC0_3D_PATCH_VERTICES                                 0x00000dcc
+
+#define NVC0_3D_WINDOW_OFFSET_X                                        0x00000df8
+
+#define NVC0_3D_WINDOW_OFFSET_Y                                        0x00000dfc
+
+#define NVC0_3D_SCISSOR_ENABLE(i0)                            (0x00000e00 + 0x10*(i0))
+#define NVC0_3D_SCISSOR_ENABLE__ESIZE                          0x00000010
+#define NVC0_3D_SCISSOR_ENABLE__LEN                            0x00000010
+
+#define NVC0_3D_SCISSOR_HORIZ(i0)                             (0x00000e04 + 0x10*(i0))
+#define NVC0_3D_SCISSOR_HORIZ__ESIZE                           0x00000010
+#define NVC0_3D_SCISSOR_HORIZ__LEN                             0x00000010
+#define NVC0_3D_SCISSOR_HORIZ_MIN__MASK                                0x0000ffff
+#define NVC0_3D_SCISSOR_HORIZ_MIN__SHIFT                       0
+#define NVC0_3D_SCISSOR_HORIZ_MAX__MASK                                0xffff0000
+#define NVC0_3D_SCISSOR_HORIZ_MAX__SHIFT                       16
+
+#define NVC0_3D_SCISSOR_VERT(i0)                              (0x00000e08 + 0x10*(i0))
+#define NVC0_3D_SCISSOR_VERT__ESIZE                            0x00000010
+#define NVC0_3D_SCISSOR_VERT__LEN                              0x00000010
+#define NVC0_3D_SCISSOR_VERT_MIN__MASK                         0x0000ffff
+#define NVC0_3D_SCISSOR_VERT_MIN__SHIFT                                0
+#define NVC0_3D_SCISSOR_VERT_MAX__MASK                         0xffff0000
+#define NVC0_3D_SCISSOR_VERT_MAX__SHIFT                                16
+
+#define NVC0_3D_STENCIL_BACK_FUNC_REF                          0x00000f54
+
+#define NVC0_3D_STENCIL_BACK_MASK                              0x00000f58
+
+#define NVC0_3D_STENCIL_BACK_FUNC_MASK                         0x00000f5c
+
+#define NVC0_3D_VERTEX_RUNOUT_ADDRESS_HIGH                     0x00000f84
+
+#define NVC0_3D_VERTEX_RUNOUT_ADDRESS_LOW                      0x00000f88
+
+#define NVC0_3D_DEPTH_BOUNDS(i0)                              (0x00000f9c + 0x4*(i0))
+#define NVC0_3D_DEPTH_BOUNDS__ESIZE                            0x00000004
+#define NVC0_3D_DEPTH_BOUNDS__LEN                              0x00000002
+
+#define NVC0_3D_MSAA_MASK(i0)                                 (0x00000fbc + 0x4*(i0))
+#define NVC0_3D_MSAA_MASK__ESIZE                               0x00000004
+#define NVC0_3D_MSAA_MASK__LEN                                 0x00000004
+
+#define NVC0_3D_CLIPID_ADDRESS_HIGH                            0x00000fcc
+
+#define NVC0_3D_CLIPID_ADDRESS_LOW                             0x00000fd0
+
+#define NVC0_3D_ZETA_ADDRESS_HIGH                              0x00000fe0
+
+#define NVC0_3D_ZETA_ADDRESS_LOW                               0x00000fe4
+
+#define NVC0_3D_ZETA_FORMAT                                    0x00000fe8
+
+#define NVC0_3D_ZETA_TILE_MODE                                 0x00000fec
+
+#define NVC0_3D_ZETA_LAYER_STRIDE                              0x00000ff0
+
+#define NVC0_3D_SCREEN_SCISSOR_HORIZ                           0x00000ff4
+#define NVC0_3D_SCREEN_SCISSOR_HORIZ_W__MASK                   0xffff0000
+#define NVC0_3D_SCREEN_SCISSOR_HORIZ_W__SHIFT                  16
+#define NVC0_3D_SCREEN_SCISSOR_HORIZ_X__MASK                   0x0000ffff
+#define NVC0_3D_SCREEN_SCISSOR_HORIZ_X__SHIFT                  0
+
+#define NVC0_3D_SCREEN_SCISSOR_VERT                            0x00000ff8
+#define NVC0_3D_SCREEN_SCISSOR_VERT_H__MASK                    0xffff0000
+#define NVC0_3D_SCREEN_SCISSOR_VERT_H__SHIFT                   16
+#define NVC0_3D_SCREEN_SCISSOR_VERT_Y__MASK                    0x0000ffff
+#define NVC0_3D_SCREEN_SCISSOR_VERT_Y__SHIFT                   0
+
+#define NVC0_3D_VTX_ATTR_DEFINE                                        0x0000114c
+#define NVC0_3D_VTX_ATTR_DEFINE_ATTR__MASK                     0x000000ff
+#define NVC0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT                    0
+#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MASK                     0x00000700
+#define NVC0_3D_VTX_ATTR_DEFINE_COMP__SHIFT                    8
+#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MIN                      0x00000001
+#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MAX                      0x00000004
+#define NVC0_3D_VTX_ATTR_DEFINE_SIZE__MASK                     0x00007000
+#define NVC0_3D_VTX_ATTR_DEFINE_SIZE__SHIFT                    12
+#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_8                         0x00001000
+#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_16                                0x00002000
+#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_32                                0x00004000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE__MASK                     0x00070000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE__SHIFT                    16
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SNORM                     0x00010000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_UNORM                     0x00020000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SINT                      0x00030000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_UINT                      0x00040000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_USCALED                   0x00050000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SSCALED                   0x00060000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_FLOAT                     0x00070000
+
+#define NVC0_3D_VTX_ATTR_DATA(i0)                             (0x00001150 + 0x4*(i0))
+#define NVC0_3D_VTX_ATTR_DATA__ESIZE                           0x00000004
+#define NVC0_3D_VTX_ATTR_DATA__LEN                             0x00000004
+
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT(i0)                      (0x00001160 + 0x4*(i0))
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT__ESIZE                    0x00000004
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT__LEN                      0x00000020
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__MASK              0x0000003f
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT             0
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST                     0x00000040
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__MASK              0x001fff80
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__SHIFT             7
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE__MASK                        0x07e00000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE__SHIFT               21
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32_32_32          0x00200000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32_32             0x00400000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16_16_16          0x00600000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32                        0x00800000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16_16             0x00a00000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8_8_8              0x01400000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16                        0x01e00000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32                   0x02400000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8_8                        0x02600000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8                  0x03000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16                   0x03600000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8                    0x03a00000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_2_10_10_10           0x06000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE__MASK                        0x78000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE__SHIFT               27
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SNORM                        0x08000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UNORM                        0x10000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SINT                 0x18000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UINT                 0x20000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_USCALED              0x28000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SSCALED              0x30000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT                        0x38000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BGRA                      0x80000000
+
+#define NVC0_3D_RT_CONTROL                                     0x0000121c
+#define NVC0_3D_RT_CONTROL_COUNT__MASK                         0x0000000f
+#define NVC0_3D_RT_CONTROL_COUNT__SHIFT                                0
+#define NVC0_3D_RT_CONTROL_MAP0__MASK                          0x00000070
+#define NVC0_3D_RT_CONTROL_MAP0__SHIFT                         4
+#define NVC0_3D_RT_CONTROL_MAP1__MASK                          0x00000380
+#define NVC0_3D_RT_CONTROL_MAP1__SHIFT                         7
+#define NVC0_3D_RT_CONTROL_MAP2__MASK                          0x00001c00
+#define NVC0_3D_RT_CONTROL_MAP2__SHIFT                         10
+#define NVC0_3D_RT_CONTROL_MAP3__MASK                          0x0000e000
+#define NVC0_3D_RT_CONTROL_MAP3__SHIFT                         13
+#define NVC0_3D_RT_CONTROL_MAP4__MASK                          0x00070000
+#define NVC0_3D_RT_CONTROL_MAP4__SHIFT                         16
+#define NVC0_3D_RT_CONTROL_MAP5__MASK                          0x00380000
+#define NVC0_3D_RT_CONTROL_MAP5__SHIFT                         19
+#define NVC0_3D_RT_CONTROL_MAP6__MASK                          0x01c00000
+#define NVC0_3D_RT_CONTROL_MAP6__SHIFT                         22
+#define NVC0_3D_RT_CONTROL_MAP7__MASK                          0x0e000000
+#define NVC0_3D_RT_CONTROL_MAP7__SHIFT                         25
+
+#define NVC0_3D_ZETA_HORIZ                                     0x00001228
+
+#define NVC0_3D_ZETA_VERT                                      0x0000122c
+
+#define NVC0_3D_ZETA_ARRAY_MODE                                        0x00001230
+#define NVC0_3D_ZETA_ARRAY_MODE_LAYERS__MASK                   0x0000ffff
+#define NVC0_3D_ZETA_ARRAY_MODE_LAYERS__SHIFT                  0
+#define NVC0_3D_ZETA_ARRAY_MODE_UNK                            0x00010000
+
+#define NVC0_3D_LINKED_TSC                                     0x00001234
+
+#define NVC0_3D_FP_RESULT_COUNT                                        0x00001298
+
+#define NVC0_3D_DEPTH_TEST_ENABLE                              0x000012cc
+
+#define NVC0_3D_D3D_FILL_MODE                                  0x000012d0
+#define NVC0_3D_D3D_FILL_MODE_POINT                            0x00000001
+#define NVC0_3D_D3D_FILL_MODE_WIREFRAME                                0x00000002
+#define NVC0_3D_D3D_FILL_MODE_SOLID                            0x00000003
+
+#define NVC0_3D_SHADE_MODEL                                    0x000012d4
+#define NVC0_3D_SHADE_MODEL_FLAT                               0x00001d00
+#define NVC0_3D_SHADE_MODEL_SMOOTH                             0x00001d01
+
+#define NVC0_3D_BLEND_INDEPENDENT                              0x000012e4
+
+#define NVC0_3D_DEPTH_WRITE_ENABLE                             0x000012e8
+
+#define NVC0_3D_ALPHA_TEST_ENABLE                              0x000012ec
+
+#define NVC0_3D_VB_ELEMENT_U8_SETUP                            0x00001300
+#define NVC0_3D_VB_ELEMENT_U8_SETUP_OFFSET__MASK               0xc0000000
+#define NVC0_3D_VB_ELEMENT_U8_SETUP_OFFSET__SHIFT              30
+#define NVC0_3D_VB_ELEMENT_U8_SETUP_COUNT__MASK                        0x3fffffff
+#define NVC0_3D_VB_ELEMENT_U8_SETUP_COUNT__SHIFT               0
+
+#define NVC0_3D_VB_ELEMENT_U8                                  0x00001304
+#define NVC0_3D_VB_ELEMENT_U8_I0__MASK                         0x000000ff
+#define NVC0_3D_VB_ELEMENT_U8_I0__SHIFT                                0
+#define NVC0_3D_VB_ELEMENT_U8_I1__MASK                         0x0000ff00
+#define NVC0_3D_VB_ELEMENT_U8_I1__SHIFT                                8
+#define NVC0_3D_VB_ELEMENT_U8_I2__MASK                         0x00ff0000
+#define NVC0_3D_VB_ELEMENT_U8_I2__SHIFT                                16
+#define NVC0_3D_VB_ELEMENT_U8_I3__MASK                         0xff000000
+#define NVC0_3D_VB_ELEMENT_U8_I3__SHIFT                                24
+
+#define NVC0_3D_D3D_CULL_MODE                                  0x00001308
+#define NVC0_3D_D3D_CULL_MODE_NONE                             0x00000001
+#define NVC0_3D_D3D_CULL_MODE_FRONT                            0x00000002
+#define NVC0_3D_D3D_CULL_MODE_BACK                             0x00000003
+
+#define NVC0_3D_DEPTH_TEST_FUNC                                        0x0000130c
+#define NVC0_3D_DEPTH_TEST_FUNC_NEVER                          0x00000200
+#define NVC0_3D_DEPTH_TEST_FUNC_LESS                           0x00000201
+#define NVC0_3D_DEPTH_TEST_FUNC_EQUAL                          0x00000202
+#define NVC0_3D_DEPTH_TEST_FUNC_LEQUAL                         0x00000203
+#define NVC0_3D_DEPTH_TEST_FUNC_GREATER                                0x00000204
+#define NVC0_3D_DEPTH_TEST_FUNC_NOTEQUAL                       0x00000205
+#define NVC0_3D_DEPTH_TEST_FUNC_GEQUAL                         0x00000206
+#define NVC0_3D_DEPTH_TEST_FUNC_ALWAYS                         0x00000207
+
+#define NVC0_3D_ALPHA_TEST_REF                                 0x00001310
+
+#define NVC0_3D_ALPHA_TEST_FUNC                                        0x00001314
+#define NVC0_3D_ALPHA_TEST_FUNC_NEVER                          0x00000200
+#define NVC0_3D_ALPHA_TEST_FUNC_LESS                           0x00000201
+#define NVC0_3D_ALPHA_TEST_FUNC_EQUAL                          0x00000202
+#define NVC0_3D_ALPHA_TEST_FUNC_LEQUAL                         0x00000203
+#define NVC0_3D_ALPHA_TEST_FUNC_GREATER                                0x00000204
+#define NVC0_3D_ALPHA_TEST_FUNC_NOTEQUAL                       0x00000205
+#define NVC0_3D_ALPHA_TEST_FUNC_GEQUAL                         0x00000206
+#define NVC0_3D_ALPHA_TEST_FUNC_ALWAYS                         0x00000207
+
+#define NVC0_3D_BLEND_COLOR(i0)                                       (0x0000131c + 0x4*(i0))
+#define NVC0_3D_BLEND_COLOR__ESIZE                             0x00000004
+#define NVC0_3D_BLEND_COLOR__LEN                               0x00000004
+
+#define NVC0_3D_TSC_FLUSH                                      0x00001330
+#define NVC0_3D_TSC_FLUSH_UNK0                                 0x00000001
+#define NVC0_3D_TSC_FLUSH_UNK1__MASK                           0x03fffff0
+#define NVC0_3D_TSC_FLUSH_UNK1__SHIFT                          4
+
+#define NVC0_3D_TIC_FLUSH                                      0x00001334
+#define NVC0_3D_TIC_FLUSH_UNK0                                 0x00000001
+#define NVC0_3D_TIC_FLUSH_UNK1__MASK                           0x03fffff0
+#define NVC0_3D_TIC_FLUSH_UNK1__SHIFT                          4
+
+#define NVC0_3D_TEX_CACHE_CTL                                  0x00001338
+#define NVC0_3D_TEX_CACHE_CTL_UNK1__MASK                       0x00000030
+#define NVC0_3D_TEX_CACHE_CTL_UNK1__SHIFT                      4
+
+#define NVC0_3D_BLEND_EQUATION_RGB                             0x00001340
+#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_ADD                    0x00008006
+#define NVC0_3D_BLEND_EQUATION_RGB_MIN                         0x00008007
+#define NVC0_3D_BLEND_EQUATION_RGB_MAX                         0x00008008
+#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_SUBTRACT               0x0000800a
+#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT       0x0000800b
+
+#define NVC0_3D_BLEND_FUNC_SRC_RGB                             0x00001344
+
+#define NVC0_3D_BLEND_FUNC_DST_RGB                             0x00001348
+
+#define NVC0_3D_BLEND_EQUATION_ALPHA                           0x0000134c
+#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_ADD                  0x00008006
+#define NVC0_3D_BLEND_EQUATION_ALPHA_MIN                       0x00008007
+#define NVC0_3D_BLEND_EQUATION_ALPHA_MAX                       0x00008008
+#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT             0x0000800a
+#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT     0x0000800b
+
+#define NVC0_3D_BLEND_FUNC_SRC_ALPHA                           0x00001350
+
+#define NVC0_3D_BLEND_FUNC_DST_ALPHA                           0x00001358
+
+#define NVC0_3D_BLEND_ENABLE(i0)                              (0x00001360 + 0x4*(i0))
+#define NVC0_3D_BLEND_ENABLE__ESIZE                            0x00000004
+#define NVC0_3D_BLEND_ENABLE__LEN                              0x00000008
+
+#define NVC0_3D_STENCIL_FRONT_ENABLE                           0x00001380
+
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL                          0x00001384
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_ZERO                     0x00000000
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INVERT                   0x0000150a
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_KEEP                     0x00001e00
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_REPLACE                  0x00001e01
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INCR                     0x00001e02
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_DECR                     0x00001e03
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INCR_WRAP                        0x00008507
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_DECR_WRAP                        0x00008508
+
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL                         0x00001388
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_ZERO                    0x00000000
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INVERT                  0x0000150a
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_KEEP                    0x00001e00
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_REPLACE                 0x00001e01
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INCR                    0x00001e02
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_DECR                    0x00001e03
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP               0x00008507
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP               0x00008508
+
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS                         0x0000138c
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_ZERO                    0x00000000
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INVERT                  0x0000150a
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_KEEP                    0x00001e00
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_REPLACE                 0x00001e01
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INCR                    0x00001e02
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_DECR                    0x00001e03
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INCR_WRAP               0x00008507
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_DECR_WRAP               0x00008508
+
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC                                0x00001390
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_NEVER                  0x00000200
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_LESS                   0x00000201
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_EQUAL                  0x00000202
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_LEQUAL                 0x00000203
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_GREATER                        0x00000204
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL               0x00000205
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_GEQUAL                 0x00000206
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_ALWAYS                 0x00000207
+
+#define NVC0_3D_STENCIL_FRONT_FUNC_REF                         0x00001394
+
+#define NVC0_3D_STENCIL_FRONT_MASK                             0x00001398
+
+#define NVC0_3D_STENCIL_FRONT_FUNC_MASK                                0x0000139c
+
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN                            0x000013a8
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_0                          0x00000001
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_1                          0x00000010
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_2                          0x00000100
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_3                          0x00001000
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_4                          0x00010000
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_5                          0x00100000
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_6                          0x01000000
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_7                          0x10000000
+
+#define NVC0_3D_Y_ORIGIN_BOTTOM                                        0x000013ac
+
+#define NVC0_3D_LINE_WIDTH                                     0x000013b0
+
+#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT                         0x00001420
+#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT__MIN                    0x00000001
+#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT__MAX                    0x00000400
+
+#define NVC0_3D_FENCE_UNK                                      0x0000142c
+
+#define NVC0_3D_VB_ELEMENT_BASE                                        0x00001434
+
+#define NVC0_3D_VB_INSTANCE_BASE                               0x00001438
+
+#define NVC0_3D_CODE_CB_FLUSH                                  0x00001440
+
+#define NVC0_3D_CLIPID_HEIGHT                                  0x00001504
+#define NVC0_3D_CLIPID_HEIGHT__MAX                             0x00002000
+
+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE                                0x00001510
+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_0                      0x00000001
+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_1                      0x00000002
+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_2                      0x00000004
+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_3                      0x00000008
+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_4                      0x00000010
+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_5                      0x00000020
+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_6                      0x00000040
+#define NVC0_3D_VP_CLIP_DISTANCE_ENABLE_7                      0x00000080
+
+#define NVC0_3D_SAMPLECNT_ENABLE                               0x00001514
+
+#define NVC0_3D_POINT_SIZE                                     0x00001518
+
+#define NVC0_3D_POINT_SPRITE_ENABLE                            0x00001520
+
+#define NVC0_3D_SAMPLECNT_RESET                                        0x00001530
+
+#define NVC0_3D_MULTISAMPLE_ZETA_ENABLE                                0x00001534
+
+#define NVC0_3D_ZETA_ENABLE                                    0x00001538
+
+#define NVC0_3D_MULTISAMPLE_CTRL                               0x0000153c
+#define NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE             0x00000001
+#define NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE                  0x00000010
+
+#define NVC0_3D_COND_ADDRESS_HIGH                              0x00001550
+
+#define NVC0_3D_COND_ADDRESS_LOW                               0x00001554
+
+#define NVC0_3D_COND_MODE                                      0x00001558
+#define NVC0_3D_COND_MODE_NEVER                                        0x00000000
+#define NVC0_3D_COND_MODE_ALWAYS                               0x00000001
+#define NVC0_3D_COND_MODE_RES_NON_ZERO                         0x00000002
+#define NVC0_3D_COND_MODE_EQUAL                                        0x00000003
+#define NVC0_3D_COND_MODE_NOT_EQUAL                            0x00000004
+
+#define NVC0_3D_TSC_ADDRESS_HIGH                               0x0000155c
+
+#define NVC0_3D_TSC_ADDRESS_LOW                                        0x00001560
+#define NVC0_3D_TSC_ADDRESS_LOW__ALIGN                         0x00000020
+
+#define NVC0_3D_TSC_LIMIT                                      0x00001564
+#define NVC0_3D_TSC_LIMIT__MAX                                 0x00001fff
+
+#define NVC0_3D_POLYGON_OFFSET_FACTOR                          0x0000156c
+
+#define NVC0_3D_LINE_SMOOTH_ENABLE                             0x00001570
+
+#define NVC0_3D_TIC_ADDRESS_HIGH                               0x00001574
+
+#define NVC0_3D_TIC_ADDRESS_LOW                                        0x00001578
+
+#define NVC0_3D_TIC_LIMIT                                      0x0000157c
+
+#define NVC0_3D_STENCIL_TWO_SIDE_ENABLE                                0x00001594
+
+#define NVC0_3D_STENCIL_BACK_OP_FAIL                           0x00001598
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_ZERO                      0x00000000
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_INVERT                    0x0000150a
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_KEEP                      0x00001e00
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_REPLACE                   0x00001e01
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_INCR                      0x00001e02
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_DECR                      0x00001e03
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_INCR_WRAP                 0x00008507
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_DECR_WRAP                 0x00008508
+
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL                          0x0000159c
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_ZERO                     0x00000000
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INVERT                   0x0000150a
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_KEEP                     0x00001e00
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_REPLACE                  0x00001e01
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INCR                     0x00001e02
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_DECR                     0x00001e03
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INCR_WRAP                        0x00008507
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_DECR_WRAP                        0x00008508
+
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS                          0x000015a0
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_ZERO                     0x00000000
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INVERT                   0x0000150a
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_KEEP                     0x00001e00
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_REPLACE                  0x00001e01
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INCR                     0x00001e02
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_DECR                     0x00001e03
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INCR_WRAP                        0x00008507
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_DECR_WRAP                        0x00008508
+
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC                         0x000015a4
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_NEVER                   0x00000200
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_LESS                    0x00000201
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_EQUAL                   0x00000202
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_LEQUAL                  0x00000203
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_GREATER                 0x00000204
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_NOTEQUAL                        0x00000205
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_GEQUAL                  0x00000206
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_ALWAYS                  0x00000207
+
+#define NVC0_3D_MULTISAMPLE_COLOR_ENABLE                       0x000015b4
+
+#define NVC0_3D_FRAMEBUFFER_SRGB                               0x000015b8
+
+#define NVC0_3D_POLYGON_OFFSET_UNITS                           0x000015bc
+
+#define NVC0_3D_GP_BUILTIN_RESULT_EN                           0x000015cc
+
+#define NVC0_3D_MULTISAMPLE_MODE                               0x000015d0
+#define NVC0_3D_MULTISAMPLE_MODE_1X                            0x00000000
+#define NVC0_3D_MULTISAMPLE_MODE_2XMS                          0x00000001
+#define NVC0_3D_MULTISAMPLE_MODE_4XMS                          0x00000002
+#define NVC0_3D_MULTISAMPLE_MODE_8XMS                          0x00000003
+#define NVC0_3D_MULTISAMPLE_MODE_4XMS_4XCS                     0x00000008
+#define NVC0_3D_MULTISAMPLE_MODE_4XMS_12XCS                    0x00000009
+#define NVC0_3D_MULTISAMPLE_MODE_8XMS_8XCS                     0x0000000a
+
+#define NVC0_3D_VERTEX_BEGIN_D3D                               0x000015d4
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE__MASK               0x0fffffff
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE__SHIFT              0
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_POINTS              0x00000001
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES               0x00000002
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP          0x00000003
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES           0x00000004
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP      0x00000005
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES_ADJACENCY     0x0000000a
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP_ADJACENCY        0x0000000b
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY    0x0000000d
+#define NVC0_3D_VERTEX_BEGIN_D3D_INSTANCE_NEXT                 0x10000000
+
+#define NVC0_3D_VERTEX_END_D3D                                 0x000015d8
+#define NVC0_3D_VERTEX_END_D3D_UNK0                            0x00000001
+#define NVC0_3D_VERTEX_END_D3D_UNK1                            0x00000002
+
+#define NVC0_3D_EDGEFLAG_ENABLE                                        0x000015e4
+
+#define NVC0_3D_VB_ELEMENT_U32                                 0x000015e8
+
+#define NVC0_3D_VB_ELEMENT_U16_SETUP                           0x000015ec
+#define NVC0_3D_VB_ELEMENT_U16_SETUP_OFFSET__MASK              0xc0000000
+#define NVC0_3D_VB_ELEMENT_U16_SETUP_OFFSET__SHIFT             30
+#define NVC0_3D_VB_ELEMENT_U16_SETUP_COUNT__MASK               0x3fffffff
+#define NVC0_3D_VB_ELEMENT_U16_SETUP_COUNT__SHIFT              0
+
+#define NVC0_3D_VB_ELEMENT_U16                                 0x000015f0
+#define NVC0_3D_VB_ELEMENT_U16_I0__MASK                                0x0000ffff
+#define NVC0_3D_VB_ELEMENT_U16_I0__SHIFT                       0
+#define NVC0_3D_VB_ELEMENT_U16_I1__MASK                                0xffff0000
+#define NVC0_3D_VB_ELEMENT_U16_I1__SHIFT                       16
+
+#define NVC0_3D_VERTEX_BASE_HIGH                               0x000015f4
+
+#define NVC0_3D_VERTEX_BASE_LOW                                        0x000015f8
+
+#define NVC0_3D_POINT_COORD_REPLACE                            0x00001604
+
+#define NVC0_3D_CODE_ADDRESS_HIGH                              0x00001608
+
+#define NVC0_3D_CODE_ADDRESS_LOW                               0x0000160c
+
+#define NVC0_3D_VERTEX_END_GL                                  0x00001614
+#define NVC0_3D_VERTEX_END_GL_UNK0                             0x00000001
+#define NVC0_3D_VERTEX_END_GL_UNK1                             0x00000002
+
+#define NVC0_3D_VERTEX_BEGIN_GL                                        0x00001618
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE__MASK                        0x0fffffff
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE__SHIFT               0
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS               0x00000000
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES                        0x00000001
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_LOOP            0x00000002
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP           0x00000003
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES            0x00000004
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP       0x00000005
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_FAN         0x00000006
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUADS                        0x00000007
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUAD_STRIP           0x00000008
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POLYGON              0x00000009
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES_ADJACENCY      0x0000000a
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES_ADJACENCY  0x0000000c
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY     0x0000000d
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_PATCHES              0x0000000e
+#define NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT                  0x10000000
+
+#define NVC0_3D_VERTEX_DATA                                    0x00001640
+
+#define NVC0_3D_PRIM_RESTART_ENABLE                            0x00001644
+
+#define NVC0_3D_PRIM_RESTART_INDEX                             0x00001648
+
+#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN                          0x0000164c
+#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID                        0x00000001
+#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID              0x00000010
+#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID             0x00000100
+#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_UNK12                    0x00001000
+
+#define NVC0_3D_POINT_SMOOTH_ENABLE                            0x00001658
+
+#define NVC0_3D_POINT_SPRITE_CTRL                              0x00001660
+
+#define NVC0_3D_TEX_MISC                                       0x00001664
+#define NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP                     0x00000004
+
+#define NVC0_3D_LINE_STIPPLE_ENABLE                            0x0000166c
+
+#define NVC0_3D_LINE_STIPPLE_PATTERN                           0x00001680
+
+#define NVC0_3D_PROVOKING_VERTEX_LAST                          0x00001684
+
+#define NVC0_3D_VERTEX_TWO_SIDE_ENABLE                         0x00001688
+
+#define NVC0_3D_POLYGON_STIPPLE_ENABLE                         0x0000168c
+
+#define NVC0_3D_POLYGON_STIPPLE_PATTERN(i0)                   (0x00001700 + 0x4*(i0))
+#define NVC0_3D_POLYGON_STIPPLE_PATTERN__ESIZE                 0x00000004
+#define NVC0_3D_POLYGON_STIPPLE_PATTERN__LEN                   0x00000020
+
+#define NVC0_3D_STRMOUT_UNK1780(i0)                           (0x00001780 + 0x4*(i0))
+#define NVC0_3D_STRMOUT_UNK1780__ESIZE                         0x00000004
+#define NVC0_3D_STRMOUT_UNK1780__LEN                           0x00000004
+
+#define NVC0_3D_UNK17BC_ADDRESS_HIGH                           0x000017bc
+
+#define NVC0_3D_UNK17BC_ADDRESS_LOW                            0x000017c0
+
+#define NVC0_3D_UNK17BC_LIMIT                                  0x000017c4
+
+#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE(i0)                 (0x00001880 + 0x4*(i0))
+#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE__ESIZE               0x00000004
+#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE__LEN                 0x00000020
+
+#define NVC0_3D_VP_POINT_SIZE_EN                               0x00001910
+
+#define NVC0_3D_CULL_FACE_ENABLE                               0x00001918
+
+#define NVC0_3D_FRONT_FACE                                     0x0000191c
+#define NVC0_3D_FRONT_FACE_CW                                  0x00000900
+#define NVC0_3D_FRONT_FACE_CCW                                 0x00000901
+
+#define NVC0_3D_CULL_FACE                                      0x00001920
+#define NVC0_3D_CULL_FACE_FRONT                                        0x00000404
+#define NVC0_3D_CULL_FACE_BACK                                 0x00000405
+#define NVC0_3D_CULL_FACE_FRONT_AND_BACK                       0x00000408
+
+#define NVC0_3D_VIEWPORT_TRANSFORM_EN                          0x0000192c
+
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL                          0x0000193c
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK0                     0x00000001
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1                     0x00000002
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK2                     0x00000004
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK3                     0x00000008
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK4                     0x00000010
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK7                     0x00000080
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK10                    0x00000400
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK11                    0x00000800
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12                    0x00001000
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK13                    0x00002000
+
+#define NVC0_3D_VIEWPORT_CLIP_RECTS_EN                         0x0000194c
+
+#define NVC0_3D_VIEWPORT_CLIP_MODE                             0x00001950
+#define NVC0_3D_VIEWPORT_CLIP_MODE_INSIDE_ANY                  0x00000000
+#define NVC0_3D_VIEWPORT_CLIP_MODE_OUTSIDE_ALL                 0x00000001
+#define NVC0_3D_VIEWPORT_CLIP_MODE_NEVER                       0x00000002
+
+#define NVC0_3D_FP_ZORDER_CTRL                                 0x0000196c
+#define NVC0_3D_FP_ZORDER_CTRL_0                               0x00000001
+#define NVC0_3D_FP_ZORDER_CTRL_1                               0x00000010
+
+#define NVC0_3D_CLIPID_ENABLE                                  0x0000197c
+
+#define NVC0_3D_CLIPID_WIDTH                                   0x00001980
+#define NVC0_3D_CLIPID_WIDTH__MAX                              0x00002000
+#define NVC0_3D_CLIPID_WIDTH__ALIGN                            0x00000040
+
+#define NVC0_3D_CLIPID_ID                                      0x00001984
+
+#define NVC0_3D_FP_CONTROL                                     0x000019a8
+#define NVC0_3D_FP_CONTROL_MULTIPLE_RESULTS                    0x00000001
+#define NVC0_3D_FP_CONTROL_EXPORTS_Z                           0x00000100
+#define NVC0_3D_FP_CONTROL_USES_KIL                            0x00100000
+
+#define NVC0_3D_DEPTH_BOUNDS_EN                                        0x000019bc
+
+#define NVC0_3D_LOGIC_OP_ENABLE                                        0x000019c4
+
+#define NVC0_3D_LOGIC_OP                                       0x000019c8
+#define NVC0_3D_LOGIC_OP_CLEAR                                 0x00001500
+#define NVC0_3D_LOGIC_OP_AND                                   0x00001501
+#define NVC0_3D_LOGIC_OP_AND_REVERSE                           0x00001502
+#define NVC0_3D_LOGIC_OP_COPY                                  0x00001503
+#define NVC0_3D_LOGIC_OP_AND_INVERTED                          0x00001504
+#define NVC0_3D_LOGIC_OP_NOOP                                  0x00001505
+#define NVC0_3D_LOGIC_OP_XOR                                   0x00001506
+#define NVC0_3D_LOGIC_OP_OR                                    0x00001507
+#define NVC0_3D_LOGIC_OP_NOR                                   0x00001508
+#define NVC0_3D_LOGIC_OP_EQUIV                                 0x00001509
+#define NVC0_3D_LOGIC_OP_INVERT                                        0x0000150a
+#define NVC0_3D_LOGIC_OP_OR_REVERSE                            0x0000150b
+#define NVC0_3D_LOGIC_OP_COPY_INVERTED                         0x0000150c
+#define NVC0_3D_LOGIC_OP_OR_INVERTED                           0x0000150d
+#define NVC0_3D_LOGIC_OP_NAND                                  0x0000150e
+#define NVC0_3D_LOGIC_OP_SET                                   0x0000150f
+
+#define NVC0_3D_CLEAR_BUFFERS                                  0x000019d0
+#define NVC0_3D_CLEAR_BUFFERS_Z                                        0x00000001
+#define NVC0_3D_CLEAR_BUFFERS_S                                        0x00000002
+#define NVC0_3D_CLEAR_BUFFERS_R                                        0x00000004
+#define NVC0_3D_CLEAR_BUFFERS_G                                        0x00000008
+#define NVC0_3D_CLEAR_BUFFERS_B                                        0x00000010
+#define NVC0_3D_CLEAR_BUFFERS_A                                        0x00000020
+#define NVC0_3D_CLEAR_BUFFERS_RT__MASK                         0x000003c0
+#define NVC0_3D_CLEAR_BUFFERS_RT__SHIFT                                6
+#define NVC0_3D_CLEAR_BUFFERS_LAYER__MASK                      0x001ffc00
+#define NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT                     10
+
+#define NVC0_3D_COLOR_MASK(i0)                                (0x00001a00 + 0x4*(i0))
+#define NVC0_3D_COLOR_MASK__ESIZE                              0x00000004
+#define NVC0_3D_COLOR_MASK__LEN                                        0x00000008
+#define NVC0_3D_COLOR_MASK_R                                   0x0000000f
+#define NVC0_3D_COLOR_MASK_G                                   0x000000f0
+#define NVC0_3D_COLOR_MASK_B                                   0x00000f00
+#define NVC0_3D_COLOR_MASK_A                                   0x0000f000
+
+#define NVC0_3D_QUERY_ADDRESS_HIGH                             0x00001b00
+
+#define NVC0_3D_QUERY_ADDRESS_LOW                              0x00001b04
+
+#define NVC0_3D_QUERY_SEQUENCE                                 0x00001b08
+
+#define NVC0_3D_QUERY_GET                                      0x00001b0c
+#define NVC0_3D_QUERY_GET_FENCE                                        0x1000f010
+#define NVC0_3D_QUERY_GET_SAMPLE_COUNT                         0x0100f002
+#define NVC0_3D_QUERY_GET_TFB                                  0x05805002
+#define NVC0_3D_QUERY_GET_GENERATED_PRIMS                      0x06805002
+#define NVC0_3D_QUERY_GET_UNK00005002                          0x00005002
+
+#define NVC0_3D_VERTEX_ARRAY_FETCH(i0)                        (0x00001c00 + 0x10*(i0))
+#define NVC0_3D_VERTEX_ARRAY_FETCH__ESIZE                      0x00000010
+#define NVC0_3D_VERTEX_ARRAY_FETCH__LEN                                0x00000020
+#define NVC0_3D_VERTEX_ARRAY_FETCH_STRIDE__MASK                        0x00000fff
+#define NVC0_3D_VERTEX_ARRAY_FETCH_STRIDE__SHIFT               0
+#define NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE                      0x00001000
+
+#define NVC0_3D_VERTEX_ARRAY_DIVISOR(i0)                      (0x00001c0c + 0x10*(i0))
+#define NVC0_3D_VERTEX_ARRAY_DIVISOR__ESIZE                    0x00000010
+#define NVC0_3D_VERTEX_ARRAY_DIVISOR__LEN                      0x00000020
+
+#define NVC0_3D_IBLEND(i0)                                    (0x00001e00 + 0x20*(i0))
+#define NVC0_3D_IBLEND__ESIZE                                  0x00000020
+#define NVC0_3D_IBLEND__LEN                                    0x00000008
+
+#define NVC0_3D_IBLEND_EQUATION_RGB(i0)                               (0x00001e04 + 0x20*(i0))
+#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_ADD                   0x00008006
+#define NVC0_3D_IBLEND_EQUATION_RGB_MIN                                0x00008007
+#define NVC0_3D_IBLEND_EQUATION_RGB_MAX                                0x00008008
+#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_SUBTRACT              0x0000800a
+#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT      0x0000800b
+
+#define NVC0_3D_IBLEND_FUNC_SRC_RGB(i0)                               (0x00001e08 + 0x20*(i0))
+
+#define NVC0_3D_IBLEND_FUNC_DST_RGB(i0)                               (0x00001e0c + 0x20*(i0))
+
+#define NVC0_3D_IBLEND_EQUATION_ALPHA(i0)                     (0x00001e10 + 0x20*(i0))
+#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_ADD                 0x00008006
+#define NVC0_3D_IBLEND_EQUATION_ALPHA_MIN                      0x00008007
+#define NVC0_3D_IBLEND_EQUATION_ALPHA_MAX                      0x00008008
+#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_SUBTRACT            0x0000800a
+#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT    0x0000800b
+
+#define NVC0_3D_IBLEND_FUNC_SRC_ALPHA(i0)                     (0x00001e14 + 0x20*(i0))
+
+#define NVC0_3D_IBLEND_FUNC_DST_ALPHA(i0)                     (0x00001e18 + 0x20*(i0))
+
+#define NVC0_3D_SP(i0)                                        (0x00002000 + 0x40*(i0))
+#define NVC0_3D_SP__ESIZE                                      0x00000040
+#define NVC0_3D_SP__LEN                                                0x00000006
+
+#define NVC0_3D_SP_SELECT(i0)                                 (0x00002000 + 0x40*(i0))
+#define NVC0_3D_SP_SELECT_ENABLE                               0x00000001
+#define NVC0_3D_SP_SELECT_PROGRAM__MASK                                0x00000070
+#define NVC0_3D_SP_SELECT_PROGRAM__SHIFT                       4
+#define NVC0_3D_SP_SELECT_PROGRAM_VP_A                         0x00000000
+#define NVC0_3D_SP_SELECT_PROGRAM_VP_B                         0x00000010
+#define NVC0_3D_SP_SELECT_PROGRAM_TCP                          0x00000020
+#define NVC0_3D_SP_SELECT_PROGRAM_TEP                          0x00000030
+#define NVC0_3D_SP_SELECT_PROGRAM_GP                           0x00000040
+#define NVC0_3D_SP_SELECT_PROGRAM_FP                           0x00000050
+
+#define NVC0_3D_SP_START_ID(i0)                                       (0x00002004 + 0x40*(i0))
+
+#define NVC0_3D_SP_GPR_ALLOC(i0)                              (0x0000200c + 0x40*(i0))
+
+#define NVC0_3D_TEX_LIMITS(i0)                                (0x00002200 + 0x10*(i0))
+#define NVC0_3D_TEX_LIMITS__ESIZE                              0x00000010
+#define NVC0_3D_TEX_LIMITS__LEN                                        0x00000005
+
+#define NVC0_3D_CB_SIZE                                                0x00002380
+
+#define NVC0_3D_CB_ADDRESS_HIGH                                        0x00002384
+
+#define NVC0_3D_CB_ADDRESS_LOW                                 0x00002388
+
+#define NVC0_3D_CB_POS                                         0x0000238c
+
+#define NVC0_3D_CB_DATA(i0)                                   (0x00002390 + 0x4*(i0))
+#define NVC0_3D_CB_DATA__ESIZE                                 0x00000004
+#define NVC0_3D_CB_DATA__LEN                                   0x00000010
+
+#define NVC0_3D_BIND_TIC(i0)                                  (0x00002404 + 0x20*(i0))
+#define NVC0_3D_BIND_TIC__ESIZE                                        0x00000020
+#define NVC0_3D_BIND_TIC__LEN                                  0x00000005
+
+#define NVC0_3D_BIND_TSC(i0)                                  (0x00002400 + 0x20*(i0))
+#define NVC0_3D_BIND_TSC__ESIZE                                        0x00000020
+#define NVC0_3D_BIND_TSC__LEN                                  0x00000005
+
+#define NVC0_3D_CB_BIND(i0)                                   (0x00002410 + 0x20*(i0))
+#define NVC0_3D_CB_BIND__ESIZE                                 0x00000020
+#define NVC0_3D_CB_BIND__LEN                                   0x00000005
+
+#define NVC0_3D_TFB_VARYING_LOCS(i0)                          (0x00002800 + 0x4*(i0))
+#define NVC0_3D_TFB_VARYING_LOCS__ESIZE                                0x00000004
+#define NVC0_3D_TFB_VARYING_LOCS__LEN                          0x00000080
+
+#define NVC0_3D_COLOR_MASK_BROADCAST                           0x00003808
+
+#define NVC0_3D_VERTEX_ARRAY_SELECT                            0x00003820
+
+#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH                                0x00003824
+
+#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW                         0x00003828
+
+#define NVC0_3D_VERTEX_ARRAY_START_HIGH                                0x0000382c
+
+#define NVC0_3D_VERTEX_ARRAY_START_LOW                         0x00003830
+
+#define NVC0_3D_BLEND_ENABLES                                  0x00003858
+
+#define NVC0_3D_POLYGON_MODE_FRONT                             0x00003868
+#define NVC0_3D_POLYGON_MODE_FRONT_POINT                       0x00001b00
+#define NVC0_3D_POLYGON_MODE_FRONT_LINE                                0x00001b01
+#define NVC0_3D_POLYGON_MODE_FRONT_FILL                                0x00001b02
+
+#define NVC0_3D_POLYGON_MODE_BACK                              0x00003870
+#define NVC0_3D_POLYGON_MODE_BACK_POINT                                0x00001b00
+#define NVC0_3D_POLYGON_MODE_BACK_LINE                         0x00001b01
+#define NVC0_3D_POLYGON_MODE_BACK_FILL                         0x00001b02
+
+#define NVC0_3D_GP_SELECT                                      0x00003878
+
+#define NVC0_3D_TEP_SELECT                                     0x00003880
+
+
+#endif /* NVC0_3D_XML */
diff --git a/src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h b/src/gallium/drivers/nvc0/nvc0_3ddefs.xml.h
new file mode 100644 (file)
index 0000000..84b1522
--- /dev/null
@@ -0,0 +1,98 @@
+#ifndef NV_3DDEFS_XML
+#define NV_3DDEFS_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nvc0_3d.xml    (  26312 bytes, from 2010-10-08 10:10:01)
+- copyright.xml  (   6498 bytes, from 2010-10-03 13:18:37)
+- nv_defs.xml    (   4437 bytes, from 2010-07-06 07:43:58)
+- nv_3ddefs.xml  (  16397 bytes, from 2010-10-08 13:30:38)
+- nv_object.xml  (  11249 bytes, from 2010-10-07 15:31:28)
+- nvchipsets.xml (   2824 bytes, from 2010-07-07 13:41:20)
+- nv50_defs.xml  (   4482 bytes, from 2010-10-03 13:18:37)
+
+Copyright (C) 2006-2010 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin KoÅ›cielnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+#define NV50_3D_BLEND_FACTOR_ZERO                              0x00004000
+#define NV50_3D_BLEND_FACTOR_ONE                               0x00004001
+#define NV50_3D_BLEND_FACTOR_SRC_COLOR                         0x00004300
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_COLOR               0x00004301
+#define NV50_3D_BLEND_FACTOR_SRC_ALPHA                         0x00004302
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA               0x00004303
+#define NV50_3D_BLEND_FACTOR_DST_ALPHA                         0x00004304
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_ALPHA               0x00004305
+#define NV50_3D_BLEND_FACTOR_DST_COLOR                         0x00004306
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_COLOR               0x00004307
+#define NV50_3D_BLEND_FACTOR_SRC_ALPHA_SATURATE                        0x00004308
+#define NV50_3D_BLEND_FACTOR_CONSTANT_COLOR                    0x0000c001
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR          0x0000c002
+#define NV50_3D_BLEND_FACTOR_CONSTANT_ALPHA                    0x0000c003
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA          0x0000c004
+#define NV50_3D_BLEND_FACTOR_SRC1_COLOR                                0x0000c900
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR              0x0000c901
+#define NV50_3D_BLEND_FACTOR_SRC1_ALPHA                                0x0000c902
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA              0x0000c903
+
+#endif /* NV_3DDEFS_XML */
diff --git a/src/gallium/drivers/nvc0/nvc0_buffer.c b/src/gallium/drivers/nvc0/nvc0_buffer.c
new file mode 100644 (file)
index 0000000..873016f
--- /dev/null
@@ -0,0 +1,166 @@
+
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#define NOUVEAU_NVC0
+#include "nouveau/nouveau_screen.h"
+#include "nouveau/nouveau_winsys.h"
+#undef NOUVEAU_NVC0
+
+#include "nvc0_context.h"
+#include "nvc0_resource.h"
+
+static void
+nvc0_buffer_destroy(struct pipe_screen *pscreen,
+                    struct pipe_resource *presource)
+{
+   struct nvc0_resource *res = nvc0_resource(presource);
+
+   if (res->bo)
+      nouveau_screen_bo_release(pscreen, res->bo);
+
+   if (res->data)
+      FREE(res->data);
+
+   FREE(res);
+}
+
+static void *
+nvc0_buffer_transfer_map(struct pipe_context *pipe,
+                         struct pipe_transfer *transfer)
+{
+   struct nvc0_resource *res = nvc0_resource(transfer->resource);
+   uint8_t *map;
+   uint32_t flags;
+
+   if (res->base.bind & PIPE_BIND_VERTEX_BUFFER)
+      nvc0_context(pipe)->vbo_dirty = TRUE;
+
+// #ifdef NOUVEAU_USERPSACE_MM
+   if (res->base.bind & PIPE_BIND_CONSTANT_BUFFER)
+      return res->data + transfer->box.x;
+// #endif
+   flags = nouveau_screen_transfer_flags(transfer->usage);
+
+   map = nouveau_screen_bo_map_range(pipe->screen,
+                                     res->bo,
+                                     transfer->box.x, transfer->box.width,
+                                     flags);
+   if (!map)
+      return NULL;
+
+   return map + transfer->box.x;
+}
+
+
+
+static void
+nvc0_buffer_transfer_flush_region(struct pipe_context *pipe,
+                                  struct pipe_transfer *transfer,
+                                  const struct pipe_box *box)
+{
+   struct nvc0_resource *res = nvc0_resource(transfer->resource);
+
+#ifdef NOUVEAU_USERPSACE_MM
+   if (!res->bo)
+      return;
+#endif
+   nouveau_screen_bo_map_flush_range(pipe->screen,
+                                     res->bo,
+                                     transfer->box.x + box->x,
+                                     box->width);
+}
+
+static void
+nvc0_buffer_transfer_unmap(struct pipe_context *pipe,
+                           struct pipe_transfer *transfer)
+{
+   struct nvc0_resource *res = nvc0_resource(transfer->resource);
+
+// #ifdef NOUVEAU_USERPSACE_MM
+   if (res->data)
+      return;
+// #endif
+   nouveau_screen_bo_unmap(pipe->screen, res->bo);
+}
+
+const struct u_resource_vtbl nvc0_buffer_vtbl =
+{
+   u_default_resource_get_handle,     /* get_handle */
+   nvc0_buffer_destroy,               /* resource_destroy */
+   NULL,                              /* is_resource_referenced */
+   u_default_get_transfer,            /* get_transfer */
+   u_default_transfer_destroy,        /* transfer_destroy */
+   nvc0_buffer_transfer_map,          /* transfer_map */
+   nvc0_buffer_transfer_flush_region, /* transfer_flush_region */
+   nvc0_buffer_transfer_unmap,        /* transfer_unmap */
+   u_default_transfer_inline_write    /* transfer_inline_write */
+};
+
+struct pipe_resource *
+nvc0_buffer_create(struct pipe_screen *pscreen,
+                   const struct pipe_resource *templ)
+{
+   struct nvc0_resource *buffer;
+
+   buffer = CALLOC_STRUCT(nvc0_resource);
+   if (!buffer)
+      return NULL;
+
+   buffer->base = *templ;
+   buffer->vtbl = &nvc0_buffer_vtbl;
+   pipe_reference_init(&buffer->base.reference, 1);
+   buffer->base.screen = pscreen;
+
+   if (buffer->base.bind & PIPE_BIND_CONSTANT_BUFFER)
+      buffer->data = MALLOC(buffer->base.width0);
+
+   buffer->bo = nouveau_screen_bo_new(pscreen,
+                                      16,
+                                      buffer->base.usage,
+                                      buffer->base.bind,
+                                      buffer->base.width0);
+   if (buffer->bo == NULL)
+      goto fail;
+
+   return &buffer->base;
+
+fail:
+   FREE(buffer);
+   return NULL;
+}
+
+
+struct pipe_resource *
+nvc0_user_buffer_create(struct pipe_screen *pscreen,
+                        void *ptr,
+                        unsigned bytes,
+                        unsigned bind)
+{
+   struct nvc0_resource *buffer;
+
+   buffer = CALLOC_STRUCT(nvc0_resource);
+   if (!buffer)
+      return NULL;
+
+   pipe_reference_init(&buffer->base.reference, 1);
+   buffer->vtbl = &nvc0_buffer_vtbl;
+   buffer->base.screen = pscreen;
+   buffer->base.format = PIPE_FORMAT_R8_UNORM;
+   buffer->base.usage = PIPE_USAGE_IMMUTABLE;
+   buffer->base.bind = bind;
+   buffer->base.width0 = bytes;
+   buffer->base.height0 = 1;
+   buffer->base.depth0 = 1;
+
+   buffer->bo = nouveau_screen_bo_user(pscreen, ptr, bytes);
+   if (!buffer->bo)
+      goto fail;
+       
+   return &buffer->base;
+
+fail:
+   FREE(buffer);
+   return NULL;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c
new file mode 100644 (file)
index 0000000..f718fec
--- /dev/null
@@ -0,0 +1,171 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "draw/draw_context.h"
+#include "pipe/p_defines.h"
+
+#include "nvc0_context.h"
+#include "nvc0_screen.h"
+#include "nvc0_resource.h"
+
+#include "nouveau/nouveau_reloc.h"
+
+static void
+nvc0_flush(struct pipe_context *pipe, unsigned flags,
+           struct pipe_fence_handle **fence)
+{
+   struct nvc0_context *nvc0 = nvc0_context(pipe);
+   struct nouveau_channel *chan = nvc0->screen->base.channel;
+
+   if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
+      BEGIN_RING(chan, RING_3D(SERIALIZE), 1);
+      OUT_RING  (chan, 0);
+      BEGIN_RING(chan, RING_3D(TEX_CACHE_CTL), 1);
+      OUT_RING  (chan, 0x00);
+   }
+
+   if (fence) {
+      nvc0_screen_fence_new(nvc0->screen, (struct nvc0_fence **)fence, TRUE);
+   }
+
+   if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_FRAME)) {
+      FIRE_RING(chan);
+   }
+}
+
+static void
+nvc0_destroy(struct pipe_context *pipe)
+{
+   struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+   draw_destroy(nvc0->draw);
+
+   if (nvc0->screen->cur_ctx == nvc0)
+      nvc0->screen->cur_ctx = NULL;
+
+   FREE(nvc0);
+}
+
+struct pipe_context *
+nvc0_create(struct pipe_screen *pscreen, void *priv)
+{
+   struct pipe_winsys *pipe_winsys = pscreen->winsys;
+   struct nvc0_screen *screen = nvc0_screen(pscreen);
+   struct nvc0_context *nvc0;
+
+   nvc0 = CALLOC_STRUCT(nvc0_context);
+   if (!nvc0)
+      return NULL;
+   nvc0->screen = screen;
+
+   nvc0->pipe.winsys = pipe_winsys;
+   nvc0->pipe.screen = pscreen;
+   nvc0->pipe.priv = priv;
+
+   nvc0->pipe.destroy = nvc0_destroy;
+
+   nvc0->pipe.draw_vbo = nvc0_draw_vbo;
+   nvc0->pipe.clear = nvc0_clear;
+
+   nvc0->pipe.flush = nvc0_flush;
+
+   screen->base.channel->user_private = nvc0;
+
+   nvc0_init_surface_functions(nvc0);
+   nvc0_init_state_functions(nvc0);
+   nvc0_init_resource_functions(&nvc0->pipe);
+
+   nvc0->draw = draw_create(&nvc0->pipe);
+   assert(nvc0->draw);
+   draw_set_rasterize_stage(nvc0->draw, nvc0_draw_render_stage(nvc0));
+
+   return &nvc0->pipe;
+}
+
+struct resident {
+   struct nouveau_bo *bo;
+   uint32_t flags;
+};
+
+void
+nvc0_bufctx_add_resident(struct nvc0_context *nvc0, int ctx,
+                         struct nvc0_resource *resource, uint32_t flags)
+{
+   struct resident rsd = { NULL, flags };
+
+   if (!resource->bo)
+      return;
+   nouveau_bo_ref(resource->bo, &rsd.bo);
+
+   util_dynarray_append(&nvc0->residents[ctx], struct resident, rsd);
+}
+
+void
+nvc0_bufctx_del_resident(struct nvc0_context *nvc0, int ctx,
+                         struct nvc0_resource *resource)
+{
+   struct resident *rsd, rem;
+   unsigned i;
+
+   for (i = 0; i < nvc0->residents[ctx].size / sizeof(struct resident); ++i) {
+      rsd = util_dynarray_element(&nvc0->residents[ctx], struct resident, i);
+
+      if (rsd->bo == resource->bo) {
+         rem = util_dynarray_pop(&nvc0->residents[ctx], struct resident);
+         nouveau_bo_ref(NULL, &rem.bo);
+         break;
+      }
+   }
+}
+
+void
+nvc0_bufctx_reset(struct nvc0_context *nvc0, int ctx)
+{
+   unsigned i;
+
+   for (i = 0; i < nvc0->residents[ctx].size / sizeof(struct resident); ++i)
+      nouveau_bo_ref(NULL, &util_dynarray_element(&nvc0->residents[ctx],
+                                                  struct resident, i)->bo);
+   util_dynarray_resize(&nvc0->residents[ctx], 0);
+}
+
+void
+nvc0_bufctx_emit_relocs(struct nvc0_context *nvc0)
+{
+   struct resident *rsd;
+   struct util_dynarray *array;
+   unsigned ctx, i;
+
+   for (ctx = 0; ctx < NVC0_BUFCTX_COUNT; ++ctx) {
+      array = &nvc0->residents[ctx];
+
+      for (i = 0; i < array->size / sizeof(struct resident); ++i) {
+         rsd = util_dynarray_element(array, struct resident, i);
+
+         nvc0_make_bo_resident(nvc0, rsd->bo, rsd->flags);
+      }
+   }
+
+   nvc0_make_bo_resident(nvc0, nvc0->screen->text, NOUVEAU_BO_RD);
+   nvc0_make_bo_resident(nvc0, nvc0->screen->uniforms, NOUVEAU_BO_RD);
+   nvc0_make_bo_resident(nvc0, nvc0->screen->txc, NOUVEAU_BO_RD);
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h
new file mode 100644 (file)
index 0000000..0d88445
--- /dev/null
@@ -0,0 +1,218 @@
+#ifndef __NVC0_CONTEXT_H__
+#define __NVC0_CONTEXT_H__
+
+#include <stdio.h>
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "util/u_inlines.h"
+#include "util/u_dynarray.h"
+
+#include "draw/draw_vertex.h"
+
+#include "nvc0_winsys.h"
+#include "nvc0_stateobj.h"
+#include "nvc0_screen.h"
+#include "nvc0_program.h"
+#include "nvc0_resource.h"
+
+#include "nvc0_3ddefs.xml.h"
+#include "nvc0_3d.xml.h"
+#include "nvc0_2d.xml.h"
+#include "nvc0_m2mf.xml.h"
+
+#define NOUVEAU_ERR(fmt, args...) \
+   fprintf(stderr, "%s:%d -  "fmt, __FUNCTION__, __LINE__, ##args);
+
+#ifdef NOUVEAU_DEBUG
+# define NOUVEAU_DBG(args...) printf(args);
+#else
+# define NOUVEAU_DBG(args...)
+#endif
+
+#define NVC0_NEW_BLEND        (1 << 0)
+#define NVC0_NEW_RASTERIZER   (1 << 1)
+#define NVC0_NEW_ZSA          (1 << 2)
+#define NVC0_NEW_VERTPROG     (1 << 3)
+#define NVC0_NEW_TCTLPROG     (1 << 4)
+#define NVC0_NEW_TEVLPROG     (1 << 5)
+#define NVC0_NEW_GMTYPROG     (1 << 6)
+#define NVC0_NEW_FRAGPROG     (1 << 7)
+#define NVC0_NEW_BLEND_COLOUR (1 << 8)
+#define NVC0_NEW_STENCIL_REF  (1 << 9)
+#define NVC0_NEW_CLIP         (1 << 10)
+#define NVC0_NEW_SAMPLE_MASK  (1 << 11)
+#define NVC0_NEW_FRAMEBUFFER  (1 << 12)
+#define NVC0_NEW_STIPPLE      (1 << 13)
+#define NVC0_NEW_SCISSOR      (1 << 14)
+#define NVC0_NEW_VIEWPORT     (1 << 15)
+#define NVC0_NEW_ARRAYS       (1 << 16)
+#define NVC0_NEW_VERTEX       (1 << 17)
+#define NVC0_NEW_CONSTBUF     (1 << 18)
+#define NVC0_NEW_TEXTURES     (1 << 19)
+#define NVC0_NEW_SAMPLERS     (1 << 20)
+
+#define NVC0_BUFCTX_CONSTANT 0
+#define NVC0_BUFCTX_FRAME    1
+#define NVC0_BUFCTX_VERTEX   2
+#define NVC0_BUFCTX_TEXTURES 3
+#define NVC0_BUFCTX_COUNT    4
+
+struct nvc0_context {
+   struct pipe_context pipe;
+
+   struct nvc0_screen *screen;
+
+   struct util_dynarray residents[NVC0_BUFCTX_COUNT];
+
+   uint32_t dirty;
+
+   struct {
+      uint32_t instance_bits;
+      uint32_t instance_base;
+      uint8_t num_vtxbufs;
+      uint8_t num_vtxelts;
+      uint8_t num_textures[5];
+      uint8_t num_samplers[5];
+      uint16_t scissor;
+   } state;
+
+   struct nvc0_blend_stateobj *blend;
+   struct nvc0_rasterizer_stateobj *rast;
+   struct nvc0_zsa_stateobj *zsa;
+   struct nvc0_vertex_stateobj *vertex;
+
+   struct nvc0_program *vertprog;
+   struct nvc0_program *tctlprog;
+   struct nvc0_program *tevlprog;
+   struct nvc0_program *gmtyprog;
+   struct nvc0_program *fragprog;
+
+   struct pipe_resource *constbuf[5][16];
+   uint16_t constbuf_dirty[5];
+
+   struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
+   unsigned num_vtxbufs;
+   struct pipe_index_buffer idxbuf;
+   uint32_t vbo_fifo;
+
+   struct pipe_sampler_view *textures[5][PIPE_MAX_SAMPLERS];
+   unsigned num_textures[5];
+   struct nvc0_tsc_entry *samplers[5][PIPE_MAX_SAMPLERS];
+   unsigned num_samplers[5];
+
+   struct pipe_framebuffer_state framebuffer;
+   struct pipe_blend_color blend_colour;
+   struct pipe_stencil_ref stencil_ref;
+   struct pipe_poly_stipple stipple;
+   struct pipe_scissor_state scissor;
+   struct pipe_viewport_state viewport;
+   struct pipe_clip_state clip;
+
+   unsigned sample_mask;
+
+   boolean vbo_dirty;
+
+   struct draw_context *draw;
+};
+
+static INLINE struct nvc0_context *
+nvc0_context(struct pipe_context *pipe)
+{
+   return (struct nvc0_context *)pipe;
+}
+
+struct nvc0_surface {
+   struct pipe_surface pipe;
+};
+
+static INLINE struct nvc0_surface *
+nvc0_surface(struct pipe_surface *ps)
+{
+   return (struct nvc0_surface *)ps;
+}
+
+static INLINE void
+nvc0_make_bo_resident(struct nvc0_context *nvc0, struct nouveau_bo *bo,
+                      unsigned flags)
+{
+   nouveau_reloc_emit(nvc0->screen->base.channel,
+                      NULL, 0, NULL, bo, 0, 0, flags, 0, 0);
+}
+
+static INLINE void
+nvc0_make_buffer_resident(struct nvc0_context *nvc0,
+                          struct nvc0_resource *res, unsigned flags)
+{
+   nvc0_make_bo_resident(nvc0, res->bo, flags);
+}
+
+/* nvc0_context.c */
+struct pipe_context *nvc0_create(struct pipe_screen *, void *);
+
+void nvc0_bufctx_emit_relocs(struct nvc0_context *);
+void nvc0_bufctx_reset(struct nvc0_context *, int ctx);
+void nvc0_bufctx_add_resident(struct nvc0_context *, int ctx,
+                              struct nvc0_resource *, uint32_t flags);
+void nvc0_bufctx_del_resident(struct nvc0_context *, int ctx,
+                              struct nvc0_resource *);
+
+/* nvc0_draw.c */
+extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *);
+
+/* nvc0_program.c */
+boolean nvc0_program_translate(struct nvc0_program *);
+void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *);
+
+/* nvc0_shader_state.c */
+void nvc0_vertprog_validate(struct nvc0_context *);
+void nvc0_tctlprog_validate(struct nvc0_context *);
+void nvc0_tevlprog_validate(struct nvc0_context *);
+void nvc0_gmtyprog_validate(struct nvc0_context *);
+void nvc0_fragprog_validate(struct nvc0_context *);
+
+/* nvc0_state.c */
+extern void nvc0_init_state_functions(struct nvc0_context *);
+
+/* nvc0_state_validate.c */
+extern boolean nvc0_state_validate(struct nvc0_context *);
+
+/* nvc0_surface.c */
+extern void nvc0_clear(struct pipe_context *, unsigned buffers,
+                       const float *rgba, double depth, unsigned stencil);
+extern void nvc0_init_surface_functions(struct nvc0_context *);
+
+/* nvc0_tex.c */
+void nvc0_validate_textures(struct nvc0_context *);
+void nvc0_validate_samplers(struct nvc0_context *);
+
+struct pipe_sampler_view *
+nvc0_create_sampler_view(struct pipe_context *,
+                         struct pipe_resource *,
+                         const struct pipe_sampler_view *);
+
+/* nvc0_transfer.c */
+void nvc0_m2mf_push_linear(struct nvc0_context *nvc0,
+                           struct nouveau_bo *dst, unsigned domain, int offset,
+                           unsigned size, void *data);
+
+/* nvc0_vbo.c */
+void nvc0_draw_vbo(struct pipe_context *, const struct pipe_draw_info *);
+
+void *
+nvc0_vertex_state_create(struct pipe_context *pipe,
+                         unsigned num_elements,
+                         const struct pipe_vertex_element *elements);
+void
+nvc0_vertex_state_delete(struct pipe_context *pipe, void *hwcso);
+
+void nvc0_vertex_arrays_validate(struct nvc0_context *nvc0);
+
+/* nvc0_push.c */
+void nvc0_push_vbo(struct nvc0_context *, const struct pipe_draw_info *);
+void nvc0_push_vbo2(struct nvc0_context *, const struct pipe_draw_info *);
+
+#endif
diff --git a/src/gallium/drivers/nvc0/nvc0_draw.c b/src/gallium/drivers/nvc0/nvc0_draw.c
new file mode 100644 (file)
index 0000000..ac7e9f6
--- /dev/null
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "draw/draw_pipe.h"
+
+#include "nvc0_context.h"
+
+struct nvc0_render_stage {
+   struct draw_stage stage;
+   struct nvc0_context *nvc0;
+};
+
+static INLINE struct nvc0_render_stage *
+nvc0_render_stage(struct draw_stage *stage)
+{
+   return (struct nvc0_render_stage *)stage;
+}
+
+static void
+nvc0_render_point(struct draw_stage *stage, struct prim_header *prim)
+{
+   NOUVEAU_ERR("\n");
+}
+
+static void
+nvc0_render_line(struct draw_stage *stage, struct prim_header *prim)
+{
+   NOUVEAU_ERR("\n");
+}
+
+static void
+nvc0_render_tri(struct draw_stage *stage, struct prim_header *prim)
+{
+   NOUVEAU_ERR("\n");
+}
+
+static void
+nvc0_render_flush(struct draw_stage *stage, unsigned flags)
+{
+}
+
+static void
+nvc0_render_reset_stipple_counter(struct draw_stage *stage)
+{
+   NOUVEAU_ERR("\n");
+}
+
+static void
+nvc0_render_destroy(struct draw_stage *stage)
+{
+   FREE(stage);
+}
+
+struct draw_stage *
+nvc0_draw_render_stage(struct nvc0_context *nvc0)
+{
+   struct nvc0_render_stage *rs = CALLOC_STRUCT(nvc0_render_stage);
+
+   rs->nvc0 = nvc0;
+   rs->stage.draw = nvc0->draw;
+   rs->stage.destroy = nvc0_render_destroy;
+   rs->stage.point = nvc0_render_point;
+   rs->stage.line = nvc0_render_line;
+   rs->stage.tri = nvc0_render_tri;
+   rs->stage.flush = nvc0_render_flush;
+   rs->stage.reset_stipple_counter = nvc0_render_reset_stipple_counter;
+
+   return &rs->stage;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_fence.c b/src/gallium/drivers/nvc0/nvc0_fence.c
new file mode 100644 (file)
index 0000000..3587097
--- /dev/null
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nvc0_fence.h"
+#include "nvc0_context.h"
+#include "nvc0_screen.h"
+
+#ifdef PIPE_OS_UNIX
+#include <sched.h>
+#endif
+
+boolean
+nvc0_screen_fence_new(struct nvc0_screen *screen, struct nvc0_fence **fence,
+                     boolean emit)
+{
+   *fence = CALLOC_STRUCT(nvc0_fence);
+   if (!*fence)
+      return FALSE;
+
+   (*fence)->screen = screen;
+   pipe_reference_init(&(*fence)->reference, 1);
+
+   if (emit)
+      nvc0_fence_emit(*fence);
+
+   return TRUE;
+}
+
+void
+nvc0_fence_emit(struct nvc0_fence *fence)
+{
+   struct nvc0_screen *screen = fence->screen;
+   struct nouveau_channel *chan = screen->base.channel;
+
+   fence->sequence = ++screen->fence.sequence;
+
+   assert(!(fence->state & NVC0_FENCE_STATE_EMITTED));
+
+   BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4);
+   OUT_RELOCh(chan, screen->fence.bo, 0, NOUVEAU_BO_WR);
+   OUT_RELOCl(chan, screen->fence.bo, 0, NOUVEAU_BO_WR);
+   OUT_RING  (chan, fence->sequence);
+   OUT_RING  (chan, 0x1000f010);
+
+   pipe_reference(NULL, &fence->reference);
+
+   if (screen->fence.tail)
+      screen->fence.tail->next = fence;
+   else
+      screen->fence.head = fence;
+
+   screen->fence.tail = fence;
+
+   fence->state = NVC0_FENCE_STATE_EMITTED;
+}
+
+void
+nvc0_fence_del(struct nvc0_fence *fence)
+{
+   struct nvc0_fence *it;
+   struct nvc0_screen *screen = fence->screen;
+
+   if (fence->state == NVC0_FENCE_STATE_EMITTED) {
+      if (fence == screen->fence.head) {
+         screen->fence.head = fence->next;
+         if (!screen->fence.head)
+            screen->fence.tail = NULL;
+      } else {
+         for (it = screen->fence.head; it && it->next != fence; it = it->next);
+         it->next = fence->next;
+         if (screen->fence.tail == fence)
+            screen->fence.tail = it;
+      }
+   }
+   FREE(fence);
+}
+
+static void
+nvc0_screen_fence_update(struct nvc0_screen *screen)
+{
+   struct nvc0_fence *fence;
+   struct nvc0_fence *next = NULL;
+   uint32_t sequence = screen->fence.map[0];
+
+   if (screen->fence.sequence_ack == sequence)
+      return;
+   screen->fence.sequence_ack = sequence;
+
+   for (fence = screen->fence.head; fence; fence = next) {
+      next = fence->next;
+      sequence = fence->sequence;
+
+      fence->state = NVC0_FENCE_STATE_SIGNALLED;
+      if (fence->trigger.func)
+         fence->trigger.func(fence->trigger.arg);
+
+      nvc0_fence_reference(&fence, NULL);
+      if (sequence == screen->fence.sequence_ack)
+         break;
+   }
+   screen->fence.head = next;
+   if (!next)
+      screen->fence.tail = NULL;
+}
+
+boolean
+nvc0_fence_wait(struct nvc0_fence *fence)
+{
+   struct nvc0_screen *screen = fence->screen;
+   int spins = 0;
+
+   if (fence->state != NVC0_FENCE_STATE_EMITTED)
+      return TRUE;
+   do {
+      nvc0_screen_fence_update(screen);
+
+      if (fence->state == NVC0_FENCE_STATE_SIGNALLED)
+         return TRUE;
+#ifdef PIPE_OS_UNIX
+      if ((spins & 7) == 7) /* spend a few cycles */
+         sched_yield();
+#endif
+   } while (++spins < 10000);
+
+   return FALSE;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_fence.h b/src/gallium/drivers/nvc0/nvc0_fence.h
new file mode 100644 (file)
index 0000000..513ac07
--- /dev/null
@@ -0,0 +1,40 @@
+
+#ifndef __NVC0_FENCE_H__
+#define __NVC0_FENCE_H__
+
+#include "util/u_inlines.h"
+
+struct nvc0_fence_trigger {
+   void (*func)(void *);
+   void *arg;
+   struct nvc0_fence_trigger *next;
+};
+
+#define NVC0_FENCE_STATE_EMITTED   1
+#define NVC0_FENCE_STATE_SIGNALLED 2
+
+/* reference first, so pipe_reference works directly */
+struct nvc0_fence {
+   struct pipe_reference reference;
+   struct nvc0_fence *next;
+   struct nvc0_screen *screen;
+   int state;
+   uint32_t sequence;
+   struct nvc0_fence_trigger trigger;
+};
+
+void nvc0_fence_emit(struct nvc0_fence *);
+void nvc0_fence_del(struct nvc0_fence *);
+
+boolean nvc0_fence_wait(struct nvc0_fence *);
+
+static INLINE void
+nvc0_fence_reference(struct nvc0_fence **ref, struct nvc0_fence *fence)
+{
+   if (pipe_reference(&(*ref)->reference, &fence->reference))
+      nvc0_fence_del(*ref);
+
+   *ref = fence;
+}
+
+#endif // __NVC0_FENCE_H__
diff --git a/src/gallium/drivers/nvc0/nvc0_formats.c b/src/gallium/drivers/nvc0/nvc0_formats.c
new file mode 100644 (file)
index 0000000..5d02357
--- /dev/null
@@ -0,0 +1,462 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nvc0_screen.h"
+#include "nv50_texture.xml.h"
+#include "nvc0_3d.xml.h"
+#include "nv50_defs.xml.h"
+#include "nv50_texture.xml.h"
+#include "pipe/p_defines.h"
+
+#define A_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r)     \
+   (NV50_TIC_MAP_##cr << NV50_TIC_0_MAPR__SHIFT) |    \
+   (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) |  \
+   (NV50_TIC_MAP_##cg << NV50_TIC_0_MAPG__SHIFT) |    \
+   (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) |  \
+   (NV50_TIC_MAP_##cb << NV50_TIC_0_MAPB__SHIFT) |    \
+   (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) |  \
+   (NV50_TIC_MAP_##ca << NV50_TIC_0_MAPA__SHIFT) |    \
+   (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) |  \
+   NV50_TIC_0_FMT_##sz,                               \
+   NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_##sz |           \
+   NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_##t0 |           \
+   (r << 31)
+
+#define B_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r)     \
+   (NV50_TIC_MAP_##cr << NV50_TIC_0_MAPR__SHIFT) |    \
+   (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) |  \
+   (NV50_TIC_MAP_##cg << NV50_TIC_0_MAPG__SHIFT) |    \
+   (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) |  \
+   (NV50_TIC_MAP_##cb << NV50_TIC_0_MAPB__SHIFT) |    \
+   (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) |  \
+   (NV50_TIC_MAP_##ca << NV50_TIC_0_MAPA__SHIFT) |    \
+   (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) |  \
+   NV50_TIC_0_FMT_##sz, 0
+
+#define VERTEX_BUFFER PIPE_BIND_VERTEX_BUFFER
+#define SAMPLER_VIEW  PIPE_BIND_SAMPLER_VIEW
+#define RENDER_TARGET PIPE_BIND_RENDER_TARGET
+#define DEPTH_STENCIL PIPE_BIND_DEPTH_STENCIL
+#define SCANOUT       PIPE_BIND_SCANOUT
+
+/* for vertex buffers: */
+#define NV50_TIC_0_FMT_8_8_8    NV50_TIC_0_FMT_8_8_8_8
+#define NV50_TIC_0_FMT_16_16_16 NV50_TIC_0_FMT_16_16_16_16
+#define NV50_TIC_0_FMT_32_32_32 NV50_TIC_0_FMT_32_32_32_32
+
+const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] =
+{
+   /* COMMON FORMATS */
+
+   [PIPE_FORMAT_B8G8R8A8_UNORM] = { NV50_SURFACE_FORMAT_A8R8G8B8_UNORM,
+    A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
+
+   [PIPE_FORMAT_B8G8R8X8_UNORM] = { NV50_SURFACE_FORMAT_X8R8G8B8_UNORM,
+    A_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
+
+   [PIPE_FORMAT_B8G8R8A8_SRGB] = { NV50_SURFACE_FORMAT_A8R8G8B8_SRGB,
+    A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_B8G8R8X8_SRGB] = { NV50_SURFACE_FORMAT_X8R8G8B8_SRGB,
+    A_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_B5G6R5_UNORM] = { NV50_SURFACE_FORMAT_R5G6B5_UNORM,
+    B_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 5_6_5, 1),
+    SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
+
+   [PIPE_FORMAT_B5G5R5A1_UNORM] = { NV50_SURFACE_FORMAT_A1R5G5B5_UNORM,
+    B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 1_5_5_5, 1),
+    SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
+
+   [PIPE_FORMAT_B4G4R4A4_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM,
+    B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 4_4_4_4, 1),
+    SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R10G10B10A2_UNORM] = { NV50_SURFACE_FORMAT_A2B10G10R10_UNORM,
+    A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 0),
+    SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER | SCANOUT },
+
+   [PIPE_FORMAT_B10G10R10A2_UNORM] = { NV50_SURFACE_FORMAT_A2R10G10B10_UNORM,
+    A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 1),
+    SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER },
+
+   /* DEPTH/STENCIL FORMATS */
+
+   [PIPE_FORMAT_Z16_UNORM] = { NV50_ZETA_FORMAT_Z16_UNORM,
+    B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 16_ZETA, 0),
+    SAMPLER_VIEW | DEPTH_STENCIL },
+
+   [PIPE_FORMAT_Z24_UNORM_S8_USCALED] = { NV50_ZETA_FORMAT_S8Z24_UNORM,
+    B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 8_24, 0),
+    SAMPLER_VIEW | DEPTH_STENCIL },
+
+   [PIPE_FORMAT_Z24X8_UNORM] = { NV50_ZETA_FORMAT_X8Z24_UNORM,
+    B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 8_24, 0),
+    SAMPLER_VIEW | DEPTH_STENCIL },
+
+   [PIPE_FORMAT_S8_USCALED_Z24_UNORM] = { NV50_ZETA_FORMAT_S8Z24_UNORM,
+    B_(C1, C1, C1, ONE, UINT, UNORM, UINT, UINT, 24_8, 0),
+    SAMPLER_VIEW | DEPTH_STENCIL },
+
+   [PIPE_FORMAT_Z32_FLOAT] = { NV50_ZETA_FORMAT_Z32_FLOAT,
+    B_(C0, C0, C0, ONE, FLOAT, UINT, UINT, UINT, 32_ZETA, 0),
+    SAMPLER_VIEW | DEPTH_STENCIL },
+
+   [PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED] = {
+    NV50_ZETA_FORMAT_Z32_FLOAT_X24S8_UNORM,
+    B_(C0, C0, C0, ONE, FLOAT, UINT, UINT, UINT, 32_8, 0),
+    SAMPLER_VIEW | DEPTH_STENCIL },
+
+   /* LUMINANCE, ALPHA, INTENSITY */
+
+   [PIPE_FORMAT_L8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM,
+    A_(C0, C0, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0),
+    SAMPLER_VIEW },
+
+   [PIPE_FORMAT_L8_SRGB] = { NV50_SURFACE_FORMAT_R8_UNORM,
+    A_(C0, C0, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0),
+    SAMPLER_VIEW },
+
+   [PIPE_FORMAT_I8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM,
+    A_(C0, C0, C0, C0, UNORM, UNORM, UNORM, UNORM, 8, 0),
+    SAMPLER_VIEW },
+
+   [PIPE_FORMAT_A8_UNORM] = { NV50_SURFACE_FORMAT_A8_UNORM,
+    A_(ZERO, ZERO, ZERO, C0, UNORM, UNORM, UNORM, UNORM, 8, 0),
+    SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_L8A8_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM,
+    A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0),
+    SAMPLER_VIEW },
+
+   [PIPE_FORMAT_L8A8_SRGB] = { 0,
+    A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0),
+    SAMPLER_VIEW },
+
+   /* DXT, RGTC */
+
+   [PIPE_FORMAT_DXT1_RGB] = { 0,
+    B_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, DXT1, 0),
+    SAMPLER_VIEW },
+
+   [PIPE_FORMAT_DXT1_RGBA] = { 0,
+    B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT1, 0),
+    SAMPLER_VIEW },
+
+   [PIPE_FORMAT_DXT3_RGBA] = { 0,
+    B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT3, 0),
+    SAMPLER_VIEW },
+
+   [PIPE_FORMAT_DXT5_RGBA] = { 0,
+    B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT5, 0),
+    SAMPLER_VIEW },
+
+   [PIPE_FORMAT_RGTC1_UNORM] = { 0,
+    B_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, RGTC1, 0),
+    SAMPLER_VIEW },
+
+   [PIPE_FORMAT_RGTC1_SNORM] = { 0,
+    B_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, RGTC1, 0),
+    SAMPLER_VIEW },
+
+   [PIPE_FORMAT_RGTC2_UNORM] = { 0,
+    B_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, RGTC2, 0),
+    SAMPLER_VIEW },
+
+   [PIPE_FORMAT_RGTC2_SNORM] = { 0,
+    B_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, RGTC2, 0),
+    SAMPLER_VIEW },
+
+   /* FLOAT 16 */
+
+   [PIPE_FORMAT_R16G16B16A16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16B16A16_FLOAT,
+    A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16_16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R16G16B16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16B16X16_FLOAT,
+    A_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R16G16_FLOAT] = { NV50_SURFACE_FORMAT_R16G16_FLOAT,
+    A_(C0, C1, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16_16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R16_FLOAT] = { NV50_SURFACE_FORMAT_R16_FLOAT,
+    A_(C0, ZERO, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   /* FLOAT 32 */
+
+   [PIPE_FORMAT_R32G32B32A32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32B32A32_FLOAT,
+    A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R32G32B32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32B32X32_FLOAT,
+    A_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R32G32_FLOAT] = { NV50_SURFACE_FORMAT_R32G32_FLOAT,
+    A_(C0, C1, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32_32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R32_FLOAT] = { NV50_SURFACE_FORMAT_R32_FLOAT,
+    A_(C0, ZERO, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   /* ODD FORMATS */
+
+   [PIPE_FORMAT_R11G11B10_FLOAT] = { NV50_SURFACE_FORMAT_B10G11R11_FLOAT,
+    B_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 10_11_11, 0),
+    SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R9G9B9E5_FLOAT] = { 0,
+    B_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, E5_9_9_9, 0),
+    SAMPLER_VIEW },
+
+   /* SNORM 32 */
+
+   [PIPE_FORMAT_R32G32B32A32_SNORM] = { 0,
+    A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R32G32B32_SNORM] = { 0,
+    A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 32_32_32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R32G32_SNORM] = { 0,
+    A_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 32_32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R32_SNORM] = { 0,
+    A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   /* UNORM 32 */
+
+   [PIPE_FORMAT_R32G32B32A32_UNORM] = { 0,
+    A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R32G32B32_UNORM] = { 0,
+    A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 32_32_32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R32G32_UNORM] = { 0,
+    A_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 32_32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R32_UNORM] = { 0,
+    A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   /* SNORM 16 */
+
+   [PIPE_FORMAT_R16G16B16A16_SNORM] = { NV50_SURFACE_FORMAT_R16G16B16A16_SNORM,
+    A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16_16_16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R16G16B16_SNORM] = { 0,
+    A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 16_16_16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R16G16_SNORM] = { NV50_SURFACE_FORMAT_R16G16_SNORM,
+    A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R16_SNORM] = { NV50_SURFACE_FORMAT_R16_SNORM,
+    A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   /* UNORM 16 */
+
+   [PIPE_FORMAT_R16G16B16A16_UNORM] = { NV50_SURFACE_FORMAT_R16G16B16A16_UNORM,
+    A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16_16_16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R16G16B16_UNORM] = { 0,
+    A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 16_16_16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R16G16_UNORM] = { NV50_SURFACE_FORMAT_R16G16_UNORM,
+    A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R16_UNORM] = { NV50_SURFACE_FORMAT_R16_UNORM,
+    A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   /* SNORM 8 */
+
+   [PIPE_FORMAT_R8G8B8A8_SNORM] = { NV50_SURFACE_FORMAT_A8B8G8R8_SNORM,
+    A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 8_8_8_8, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R8G8B8_SNORM] = { 0,
+    A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 8_8_8, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R8G8_SNORM] = { NV50_SURFACE_FORMAT_R8G8_SNORM,
+    A_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 8_8, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R8_SNORM] = { NV50_SURFACE_FORMAT_R8_SNORM,
+    A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 8, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   /* UNORM 8 */
+
+   [PIPE_FORMAT_R8G8B8A8_UNORM] = { NV50_SURFACE_FORMAT_A8B8G8R8_UNORM,
+    A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R8G8B8A8_SRGB] = { NV50_SURFACE_FORMAT_A8B8G8R8_SRGB,
+    A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0),
+    SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R8G8B8_UNORM] = { NV50_SURFACE_FORMAT_X8B8G8R8_UNORM,
+    A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R8G8B8_SRGB] = { NV50_SURFACE_FORMAT_X8B8G8R8_SRGB,
+    A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0),
+    SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R8G8_UNORM] = { NV50_SURFACE_FORMAT_R8G8_UNORM,
+    A_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 8_8, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R8_UNORM] = { NV50_SURFACE_FORMAT_R8_UNORM,
+    A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   /* SSCALED 32 */
+
+   [PIPE_FORMAT_R32G32B32A32_SSCALED] = { NV50_SURFACE_FORMAT_R32G32B32A32_SINT,
+    A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 32_32_32_32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R32G32B32_SSCALED] = { 0,
+    A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32_32_32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R32G32_SSCALED] = { NV50_SURFACE_FORMAT_R32G32_SINT,
+    A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32_32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R32_SSCALED] = { 0,
+    A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   /* USCALED 32 */
+
+   [PIPE_FORMAT_R32G32B32A32_USCALED] = { NV50_SURFACE_FORMAT_R32G32B32A32_UINT,
+    A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 32_32_32_32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R32G32B32_USCALED] = { 0,
+    A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 32_32_32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R32G32_USCALED] = { NV50_SURFACE_FORMAT_R32G32_UINT,
+    A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 32_32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R32_USCALED] = { 0,
+    A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   /* SSCALED 16 */
+
+   [PIPE_FORMAT_R16G16B16A16_SSCALED] = { NV50_SURFACE_FORMAT_R16G16B16A16_SINT,
+    A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 16_16_16_16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R16G16B16_SSCALED] = { 0,
+    A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16_16_16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R16G16_SSCALED] = { NV50_SURFACE_FORMAT_R16G16_SINT,
+    A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16_16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R16_SSCALED] = { NV50_SURFACE_FORMAT_R16_SINT,
+    A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   /* USCALED 16 */
+
+   [PIPE_FORMAT_R16G16B16A16_USCALED] = { NV50_SURFACE_FORMAT_R16G16B16A16_UINT,
+    A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 16_16_16_16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R16G16B16_USCALED] = { 0,
+    A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 16_16_16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R16G16_USCALED] = { NV50_SURFACE_FORMAT_R16G16_UINT,
+    A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 16_16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R16_USCALED] = { NV50_SURFACE_FORMAT_R16_UINT,
+    A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   /* SSCALED 8 */
+
+   [PIPE_FORMAT_R8G8B8A8_SSCALED] = { NV50_SURFACE_FORMAT_A8B8G8R8_SINT,
+    A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 8_8_8_8, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R8G8B8_SSCALED] = { 0,
+    A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8_8_8, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R8G8_SSCALED] = { NV50_SURFACE_FORMAT_R8G8_SINT,
+    A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8_8, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R8_SSCALED] = { NV50_SURFACE_FORMAT_R8_SINT,
+    A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   /* USCALED 8 */
+
+   [PIPE_FORMAT_R8G8B8A8_USCALED] = { NV50_SURFACE_FORMAT_A8B8G8R8_UINT,
+    A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 8_8_8_8, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R8G8B8_USCALED] = { 0,
+    A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 8_8_8, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R8G8_USCALED] = { NV50_SURFACE_FORMAT_R8G8_UINT,
+    A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 8_8, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R8_USCALED] = { NV50_SURFACE_FORMAT_R8_UINT,
+    A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 8, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+};
diff --git a/src/gallium/drivers/nvc0/nvc0_graph_macros.h b/src/gallium/drivers/nvc0/nvc0_graph_macros.h
new file mode 100644 (file)
index 0000000..b8b89de
--- /dev/null
@@ -0,0 +1,220 @@
+
+#ifndef __NVC0_PGRAPH_MACROS_H__
+#define __NVC0_PGRAPH_MACROS_H__
+
+static const uint32_t nvc0_9097_blend_enables[] =
+{
+   0x05360021, /* maddr [0x1360, increment = 0x4] */
+   0x00404042, /* sendbf $r1 0 0x1 */
+   0x00424042, /* sendbf $r1 1 0x1 */
+   0x00444042, /* sendbf $r1 2 0x1 */
+   0x00464042, /* sendbf $r1 3 0x1 */
+   0x00484042, /* sendbf $r1 4 0x1 */
+   0x004a4042, /* sendbf $r1 5 0x1 */
+   0x004c4042, /* sendbf $r1 6 0x1 */
+   0x004e4042, /* sendbf $r1 7 0x1 */
+   0x01534215, /* read $r2 0x1534 */
+   0x03428021, /* maddr [0x3428] */
+   0x00000841, /* send $r1 */
+   0x00d9c021, /* maddr [0x0d9c] */
+   0x00014807, /* braz $r1 0x5 */
+   0x00000311, /* mov $r3 0 */
+   0x00009027, /* braz annul $r2 0x2 */
+   0x0342c315, /* read $r3 [0x342c] */
+   0x00000011, /* mov $r0 0 */
+   0x000018c1, /* exit send $r3 */
+   0x00000011  /* mov $r0 0, delay slot */
+};
+
+/*
+ * if (limit == 0) {
+ *    LIMIT = 0;
+ *    START = 1;
+ * } else {
+ *    LIMIT = limit - 1;
+ *    START = start;
+ * }
+ */
+static const uint32_t nvc0_9097_vertex_array_select[] =
+{
+   0x00000301,
+   0x00000401,
+   0x00000701,
+   0x00131d10,
+   0x0001a807,
+   0x00000601,
+   0x00004211,
+   0x0004a410,
+   0x0000c007,
+   0x00061b10,
+   0x00004611,
+   0x11004512,
+   0x01c06d11,
+   0x00004211,
+   0x6180a822,
+   0x0201c042,
+   0x00003041,
+   0x09004512,
+   0x01f02d11,
+   0x6180a822,
+   0x0200c0c2,
+   0x00002041
+};
+
+static const uint32_t nvc0_9097_color_mask_brdc[] =
+{
+   0x05a00021, /* maddr [0x1680] */
+   0x00000841, /* send $r1 */
+   0x00000841, /* send $r1 */
+   0x00000841, /* send $r1 */
+   0x00000841, /* send $r1 */
+   0x00000841, /* send $r1 */
+   0x00000841, /* send $r1 */
+   0x000008c1, /* exit send $r1 */
+   0x00000841, /* send $r1 */
+};
+
+static const uint32_t nvc0_9097_poly_mode_front[] =
+{
+   0x03410615,
+   0x00db0515,
+   0x22018312,
+   0x02100415,
+   0x00dac021,
+   0x00000841,
+   0x06c04211,
+   0x0004aa10,
+   0x00009037,
+   0x22118312,
+   0x020c0515,
+   0x06c04211,
+   0x00048a10,
+   0x00009037,
+   0x22118312,
+   0x007f8612,
+   0x0000f037,
+   0x01a24021,
+   0x1cd4c041,
+   0x00104211,
+   0x0004a210,
+   0x00009037,
+   0x00000311,
+   0x000c4211,
+   0x0004aa10,
+   0x00009037,
+   0x00000311,
+   0x002ec0a1,
+   0x00001841
+};
+
+static const uint32_t nvc0_9097_poly_mode_back[] =
+{
+   0x03410615,
+   0x00dac515,
+   0x22018312,
+   0x02100415,
+   0x00db0021,
+   0x00000841,
+   0x06c04211,
+   0x0004aa10,
+   0x00009037,
+   0x22118312,
+   0x020c0515,
+   0x06c04211,
+   0x00048a10,
+   0x00009037,
+   0x22118312,
+   0x007f8612,
+   0x0000f037,
+   0x01a24021,
+   0x1cd4c041,
+   0x00104211,
+   0x0004a210,
+   0x00009037,
+   0x00000311,
+   0x000c4211,
+   0x0004aa10,
+   0x00009037,
+   0x00000311,
+   0x002ec0a1,
+   0x00001841
+};
+
+static const uint32_t nvc0_9097_gp_select[] = /* 0x0f */
+{
+   0x03410615, /* 0x00: read $r6 [0x3410] */
+   0x00dac515, /* 0x01: read $r5 [NVC0_3D_POLYGON_MODE_FRONT] */
+   0x22018312, /* 0x02: mov $r3 extrinsrt 0 $r6 0 0x8 0x4 */
+   0x00db0415, /* 0x03: read $r4 [NVC0_3D_POLYGON_MODE_BACK] */
+   0x02100021, /* 0x04: maddr [NVC0_3D_SP_SELECT(4)] */
+   0x00000841, /* 0x05: send $r1 */
+   0x06c04211, /* 0x06: mov $r2 GL_POLYGON_MODE_LINE */
+   0x0004aa10, /* 0x07: mov $r2 sub $r5 $r2 */
+   0x00009037, /* 0x08: branz annul $r2 0xa */
+   0x22118312, /* 0x09: mov $r3 extrinsrt 0 $r6 0x8 0x8 0x4 */
+   0x020c0515, /* 0x0a: read $r5 [NVC0_3D_SP_SELECT(3)] */
+   0x06c04211, /* 0x0b: mov $r2 GL_POLYGON_MODE_LINE */
+   0x0004a210, /* 0x0c: mov $r2 sub $r4 $r2 */
+   0x00009037, /* 0x0d: branz annul $r2 0xf */
+   0x22118312, /* 0x0e: mov $r3 extrinsrt 0 $r6 0x8 0x8 0x4 */
+   0x007f8612, /* 0x0f: mov $r6 extrinsrt 0 $r6 0x1f 0x1 0 */
+   0x0000f037, /* 0x10: branz annul $r6 0x13 */
+   0x01a24021, /* 0x11: maddr [0x1a24] */
+   0x1cd4c041, /* 0x12: send 0x7353 */
+   0x00104211, /* 0x13: mov $r2 0x41 */
+   0x00048a10, /* 0x14: mov $r2 sub $r1 $r2 */
+   0x00009037, /* 0x15: branz annul $r2 0x17 */
+   0x00000311, /* 0x16: mov $r3 0 */
+   0x000c4211, /* 0x17: mov $r2 0x31 */
+   0x0004aa10, /* 0x18: mov $r2 sub $r5 $r2 */
+   0x00009037, /* 0x19: branz annul $r2 0x1b */
+   0x00000311, /* 0x1a: mov $r3 0 */
+   0x002ec0a1, /* 0x1b: exit maddr [0x02ec] */
+   0x00001841  /* 0x1c: send $r3 */
+};
+
+static const uint32_t nvc0_9097_tep_select[] = /* 0x10 */
+{
+   0x03410615, /* 0x00: read $r6 [0x3410] */
+   0x00dac515, /* 0x01: read $r5 [NVC0_3D_POLYGON_MODE_FRONT] */
+   0x22018312, /* 0x02: mov $r3 extrinsrt 0 $r6 0 0x8 0x4 */
+   0x00db0415, /* 0x03: read $r4 [NVC0_3D_POLYGON_MODE_BACK] */
+   0x020c0021, /* 0x04: maddr [NVC0_3D_SP_SELECT(3), increment = 0] */
+   0x00000841, /* 0x05: send $r1 */
+   0x06c04211, /* 0x06: mov $r2 GL_POLYGON_MODE_LINE */
+   0x0004aa10, /* 0x07: mov $r2 sub $r5 $r2 */
+   0x00009037, /* 0x08: branz annul $r2 0xa */
+   0x22118312, /* 0x09: mov $r3 extrinsrt 0 $r6 0x8 0x8 0x4 */
+   0x02100515, /* 0x0a: read $r5 [NVC0_3D_SP_SELECT(4)] */
+   0x06c04211, /* 0x0b: mov $r2 GL_POLYGON_MODE_LINE */
+   0x0004a210, /* 0x0c: mov $r2 sub $r4 $r2 */
+   0x00009037, /* 0x0d: branz annul $r2 0xf */
+   0x22118312, /* 0x0e: mov $r3 extrinsrt 0 $r6 0x8 0x8 0x4 */
+   0x007f8612, /* 0x0f: mov $r6 extrinsrt 0 $r6 0x1f 0x1 0 */
+   0x0000f037, /* 0x10: branz annul $r6 */
+   0x01a24021, /* 0x11: maddr [0x1a24] */
+   0x1cd4c041, /* 0x12: send 0x7353 */
+   0x00104211, /* 0x13: mov $r2 0x41 */
+   0x0004aa10, /* 0x14: mov $r2 sub $r5 $r2 */
+   0x00009037, /* 0x15: branz annul $r2 0x17 */
+   0x00000311, /* 0x16: mov $r3 0 */
+   0x000c4211, /* 0x17: mov $r2 0x31 */
+   0x00048a10, /* 0x18: mov $r2 sub $r1 $r2 */
+   0x00035037, /* 0x19: branz annul $r2 0x26 */
+   0x00000311, /* 0x1a: mov $r3 0 */
+   0x03430415, /* 0x1b: read $r4 [0x3430] */
+   0x00450512, /* 0x1c: mov $r5 extrinsrt 0 $r4 0x2 0x1 0 */
+   0x00004211, /* 0x1d: mov $r2 0x1 */
+   0x0004aa10, /* 0x1e: mov $r2 sub $r5 $r2 */
+   0x00019037, /* 0x1f: branz annul $r2 0x25 */
+   0x0003c211, /* 0x20: mov $r2 0xf */
+   0x01a2c021, /* 0x21: maddr [0x1a2c] */
+   0xffffd211, /* 0x22: mov $r2 add $r2 -0x1 */
+   0xffff9017, /* 0x23: branz $r2 add $r2 */
+   0x00000041, /* 0x24: send 0 */
+   0x00000011, /* 0x25: nop */
+   0x002ec0a1, /* 0x26: exit maddr [0x02ec] */
+   0x00001841  /* 0x27: send $r3 */
+};
+
+#endif
diff --git a/src/gallium/drivers/nvc0/nvc0_m2mf.xml.h b/src/gallium/drivers/nvc0/nvc0_m2mf.xml.h
new file mode 100644 (file)
index 0000000..3bf628d
--- /dev/null
@@ -0,0 +1,138 @@
+#ifndef NVC0_M2MF_XML
+#define NVC0_M2MF_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nvc0_m2mf.xml  (   2227 bytes, from 2010-10-16 16:10:29)
+- copyright.xml  (   6498 bytes, from 2010-10-03 13:18:37)
+- nv_object.xml  (  11379 bytes, from 2010-10-16 11:43:24)
+- nvchipsets.xml (   2907 bytes, from 2010-10-15 16:28:21)
+- nv_defs.xml    (   4437 bytes, from 2010-07-06 07:43:58)
+
+Copyright (C) 2006-2010 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin KoÅ›cielnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+#define NVC0_M2MF_TILING_MODE_IN                               0x00000204
+
+#define NVC0_M2MF_TILING_PITCH_IN                              0x00000208
+
+#define NVC0_M2MF_TILING_HEIGHT_IN                             0x0000020c
+
+#define NVC0_M2MF_TILING_DEPTH_IN                              0x00000210
+
+#define NVC0_M2MF_TILING_POSITION_IN_Z                         0x00000214
+
+#define NVC0_M2MF_TILING_MODE_OUT                              0x00000220
+
+#define NVC0_M2MF_TILING_PITCH_OUT                             0x00000224
+
+#define NVC0_M2MF_TILING_HEIGHT_OUT                            0x00000228
+
+#define NVC0_M2MF_TILING_DEPTH_OUT                             0x0000022c
+
+#define NVC0_M2MF_TILING_POSITION_OUT_Z                                0x00000230
+
+#define NVC0_M2MF_OFFSET_OUT_HIGH                              0x00000238
+
+#define NVC0_M2MF_OFFSET_OUT_LOW                               0x0000023c
+
+#define NVC0_M2MF_EXEC                                         0x00000300
+#define NVC0_M2MF_EXEC_PUSH                                    0x00000001
+#define NVC0_M2MF_EXEC_LINEAR_IN                               0x00000010
+#define NVC0_M2MF_EXEC_LINEAR_OUT                              0x00000100
+#define NVC0_M2MF_EXEC_NOTIFY                                  0x00002000
+#define NVC0_M2MF_EXEC_INC__MASK                               0x00f00000
+#define NVC0_M2MF_EXEC_INC__SHIFT                              20
+
+#define NVC0_M2MF_DATA                                         0x00000304
+
+#define NVC0_M2MF_OFFSET_IN_HIGH                               0x0000030c
+
+#define NVC0_M2MF_OFFSET_IN_LOW                                        0x00000310
+
+#define NVC0_M2MF_PITCH_IN                                     0x00000314
+
+#define NVC0_M2MF_PITCH_OUT                                    0x00000318
+
+#define NVC0_M2MF_LINE_LENGTH_IN                               0x0000031c
+
+#define NVC0_M2MF_LINE_COUNT                                   0x00000320
+
+#define NVC0_M2MF_NOTIFY_ADDRESS_HIGH                          0x0000032c
+
+#define NVC0_M2MF_NOTIFY_ADDRESS_LOW                           0x00000330
+
+#define NVC0_M2MF_NOTIFY                                       0x00000334
+
+#define NVC0_M2MF_TILING_POSITION_IN_X                         0x00000344
+
+#define NVC0_M2MF_TILING_POSITION_IN_Y                         0x00000348
+
+#define NVC0_M2MF_TILING_POSITION_OUT_X                                0x0000034c
+
+#define NVC0_M2MF_TILING_POSITION_OUT_Y                                0x00000350
+
+
+#endif /* NVC0_M2MF_XML */
diff --git a/src/gallium/drivers/nvc0/nvc0_miptree.c b/src/gallium/drivers/nvc0/nvc0_miptree.c
new file mode 100644 (file)
index 0000000..aac09d7
--- /dev/null
@@ -0,0 +1,327 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+
+#include "nvc0_context.h"
+#include "nvc0_resource.h"
+#include "nvc0_transfer.h"
+
+static INLINE uint32_t
+get_tile_dims(unsigned nx, unsigned ny, unsigned nz)
+{
+   uint32_t tile_mode = 0x000;
+
+   if (ny > 64) tile_mode = 0x040; /* height 128 tiles */
+   else
+   if (ny > 32) tile_mode = 0x030; /* height 64 tiles */
+   else
+   if (ny > 16) tile_mode = 0x020; /* height 32 tiles */
+   else
+   if (ny >  8) tile_mode = 0x010; /* height 16 tiles */
+
+   if (nz == 1)
+      return tile_mode;
+   else
+   if (tile_mode > 0x020)
+      tile_mode = 0x020;
+
+   if (nz > 16 && tile_mode < 0x020)
+      return tile_mode | 0x500; /* depth 32 tiles */
+   if (nz > 8) return tile_mode | 0x400; /* depth 16 tiles */
+   if (nz > 4) return tile_mode | 0x300; /* depth 8 tiles */
+   if (nz > 2) return tile_mode | 0x200; /* depth 4 tiles */
+
+   return tile_mode | 0x100;
+}
+
+static INLINE unsigned
+get_zslice_offset(uint32_t tile_mode, unsigned z, unsigned pitch, unsigned nbh)
+{
+   unsigned tile_h = NVC0_TILE_H(tile_mode);
+   unsigned tile_d = NVC0_TILE_D(tile_mode);
+
+   /* pitch_2d == to next slice within this volume tile */
+   /* pitch_3d == size (in bytes) of a volume tile */
+   unsigned pitch_2d = tile_h * 64;
+   unsigned pitch_3d = tile_d * align(nbh, tile_h) * pitch;
+
+   return (z % tile_d) * pitch_2d + (z / tile_d) * pitch_3d;
+}
+
+static void
+nvc0_miptree_destroy(struct pipe_screen *pscreen, struct pipe_resource *pt)
+{
+   struct nvc0_miptree *mt = nvc0_miptree(pt);
+   unsigned l;
+
+   for (l = 0; l <= pt->last_level; ++l)
+      FREE(mt->level[l].image_offset);
+
+   nouveau_screen_bo_release(pscreen, mt->base.bo);
+
+   FREE(mt);
+}
+
+static boolean
+nvc0_miptree_get_handle(struct pipe_screen *pscreen,
+                        struct pipe_resource *pt,
+                        struct winsys_handle *whandle)
+{
+   struct nvc0_miptree *mt = nvc0_miptree(pt);
+   unsigned stride;
+
+   if (!mt || !mt->base.bo)
+      return FALSE;
+
+   stride = util_format_get_stride(mt->base.base.format,
+                                   mt->base.base.width0);
+
+   return nouveau_screen_bo_get_handle(pscreen,
+                                       mt->base.bo,
+                                       stride,
+                                       whandle);
+}
+
+const struct u_resource_vtbl nvc0_miptree_vtbl =
+{
+   nvc0_miptree_get_handle,         /* get_handle */
+   nvc0_miptree_destroy,            /* resource_destroy */
+   NULL,                            /* is_resource_referenced */
+   nvc0_miptree_transfer_new,       /* get_transfer */
+   nvc0_miptree_transfer_del,       /* transfer_destroy */
+   nvc0_miptree_transfer_map,        /* transfer_map */
+   u_default_transfer_flush_region, /* transfer_flush_region */
+   nvc0_miptree_transfer_unmap,     /* transfer_unmap */
+   u_default_transfer_inline_write  /* transfer_inline_write */
+};
+
+struct pipe_resource *
+nvc0_miptree_create(struct pipe_screen *pscreen,
+                    const struct pipe_resource *templ)
+{
+   struct nouveau_device *dev = nouveau_screen(pscreen)->device;
+   struct nvc0_miptree *mt = CALLOC_STRUCT(nvc0_miptree);
+   struct pipe_resource *pt = &mt->base.base;
+   int ret, i;
+   unsigned w, h, d, l, image_alignment, alloc_size;
+   uint32_t tile_flags;
+
+   if (!mt)
+      return NULL;
+
+   mt->base.vtbl = &nvc0_miptree_vtbl;
+   *pt = *templ;
+   pipe_reference_init(&pt->reference, 1);
+   pt->screen = pscreen;
+
+   w = pt->width0;
+   h = pt->height0;
+   d = pt->depth0;
+
+   switch (pt->format) {
+   case PIPE_FORMAT_Z16_UNORM:
+      tile_flags = 0x070; /* COMPRESSED */
+      tile_flags = 0x020; /* NORMAL ? */
+      tile_flags = 0x010; /* NORMAL ? */
+      break;
+   case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+      tile_flags = 0x530; /* MSAA 4, COMPRESSED */
+      tile_flags = 0x460; /* NORMAL */
+      break;
+   case PIPE_FORMAT_Z24X8_UNORM:
+   case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
+      tile_flags = 0x110; /* NORMAL */
+      if (w * h >= 128 * 128 && 0)
+         tile_flags = 0x170; /* COMPRESSED, requires magic */
+      break;
+   case PIPE_FORMAT_R32G32B32A32_FLOAT:
+      tile_flags = 0xf50; /* COMPRESSED */
+      tile_flags = 0xf70; /* MSAA 2 */
+      tile_flags = 0xf90; /* MSAA 4 */
+      tile_flags = 0xfe0; /* NORMAL */
+      break;
+   case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED:
+      tile_flags = 0xce0; /* COMPRESSED */
+      tile_flags = 0xcf0; /* MSAA 2, COMPRESSED */
+      tile_flags = 0xd00; /* MSAA 4, COMPRESSED */
+      tile_flags = 0xc30; /* NORMAL */
+      break;
+   case PIPE_FORMAT_R16G16B16A16_UNORM:
+      tile_flags = 0xe90; /* COMPRESSED */
+      break;
+   default:
+      tile_flags = 0xe00; /* MSAA 4, COMPRESSED 32 BIT */
+      tile_flags = 0xfe0; /* NORMAL 32 BIT */
+      if (w * h >= 128 * 128 && 0)
+         tile_flags = 0xdb0; /* COMPRESSED 32 BIT, requires magic */
+      break;
+   }
+
+   /* XXX: texture arrays */
+   mt->image_nr = (pt->target == PIPE_TEXTURE_CUBE) ? 6 : 1;
+
+   for (l = 0; l <= pt->last_level; l++) {
+      struct nvc0_miptree_level *lvl = &mt->level[l];
+      unsigned nby = util_format_get_nblocksy(pt->format, h);
+
+      lvl->image_offset = CALLOC(mt->image_nr, sizeof(int));
+      lvl->pitch = align(util_format_get_stride(pt->format, w), 64);
+      lvl->tile_mode = get_tile_dims(w, nby, d);
+
+      w = u_minify(w, 1);
+      h = u_minify(h, 1);
+      d = u_minify(d, 1);
+   }
+
+   image_alignment  = NVC0_TILE_H(mt->level[0].tile_mode) * 64;
+   image_alignment *= NVC0_TILE_D(mt->level[0].tile_mode);
+
+   /* NOTE the distinction between arrays of mip-mapped 2D textures and
+    * mip-mapped 3D textures. We can't use image_nr == depth for 3D mip.
+    */
+   for (i = 0; i < mt->image_nr; i++) {
+      for (l = 0; l <= pt->last_level; l++) {
+         struct nvc0_miptree_level *lvl = &mt->level[l];
+         int size;
+         unsigned tile_h = NVC0_TILE_H(lvl->tile_mode);
+         unsigned tile_d = NVC0_TILE_D(lvl->tile_mode);
+
+         h = u_minify(pt->height0, l);
+         d = u_minify(pt->depth0, l);
+
+         size  = lvl->pitch;
+         size *= align(util_format_get_nblocksy(pt->format, h), tile_h);
+         size *= align(d, tile_d);
+
+         lvl->image_offset[i] = mt->total_size;
+
+         mt->total_size += size;
+      }
+      mt->total_size = align(mt->total_size, image_alignment);
+   }
+
+   alloc_size = mt->total_size;
+   if (tile_flags == 0x170)
+      alloc_size *= 3; /* HiZ, XXX: correct size */
+
+   ret = nouveau_bo_new_tile(dev, NOUVEAU_BO_VRAM, 256, alloc_size,
+                             mt->level[0].tile_mode, tile_flags,
+                             &mt->base.bo);
+   if (ret) {
+      for (l = 0; l <= pt->last_level; ++l)
+         FREE(mt->level[l].image_offset);
+      FREE(mt);
+      return NULL;
+   }
+
+   return pt;
+}
+
+struct pipe_resource *
+nvc0_miptree_from_handle(struct pipe_screen *pscreen,
+                         const struct pipe_resource *templ,
+                         struct winsys_handle *whandle)
+{
+   struct nvc0_miptree *mt;
+   unsigned stride;
+
+       /* only supports 2D, non-mip mapped textures for the moment */
+   if ((templ->target != PIPE_TEXTURE_2D &&
+        templ->target != PIPE_TEXTURE_RECT) ||
+       templ->last_level != 0 ||
+       templ->depth0 != 1)
+      return NULL;
+
+   mt = CALLOC_STRUCT(nvc0_miptree);
+   if (!mt)
+      return NULL;
+
+   mt->base.bo = nouveau_screen_bo_from_handle(pscreen, whandle, &stride);
+   if (mt->base.bo == NULL) {
+      FREE(mt);
+      return NULL;
+   }
+
+   mt->base.base = *templ;
+   mt->base.vtbl = &nvc0_miptree_vtbl;
+   pipe_reference_init(&mt->base.base.reference, 1);
+   mt->base.base.screen = pscreen;
+   mt->image_nr = 1;
+   mt->level[0].pitch = stride;
+   mt->level[0].image_offset = CALLOC(1, sizeof(unsigned));
+   mt->level[0].tile_mode = mt->base.bo->tile_mode;
+
+   /* no need to adjust bo reference count */
+   return &mt->base.base;
+}
+
+
+/* Surface functions.
+ */
+
+struct pipe_surface *
+nvc0_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt,
+                         unsigned face, unsigned level, unsigned zslice,
+                         unsigned flags)
+{
+   struct nvc0_miptree *mt = nvc0_miptree(pt);
+   struct nvc0_miptree_level *lvl = &mt->level[level];
+   struct pipe_surface *ps;
+   unsigned img = 0;
+
+   if (pt->target == PIPE_TEXTURE_CUBE)
+      img = face;
+
+   ps = CALLOC_STRUCT(pipe_surface);
+   if (!ps)
+      return NULL;
+   pipe_resource_reference(&ps->texture, pt);
+   ps->format = pt->format;
+   ps->width = u_minify(pt->width0, level);
+   ps->height = u_minify(pt->height0, level);
+   ps->usage = flags;
+   pipe_reference_init(&ps->reference, 1);
+   ps->face = face;
+   ps->level = level;
+   ps->zslice = zslice;
+   ps->offset = lvl->image_offset[img];
+
+   if (pt->target == PIPE_TEXTURE_3D)
+      ps->offset += get_zslice_offset(lvl->tile_mode, zslice, lvl->pitch,
+                                      util_format_get_nblocksy(pt->format,
+                                                               ps->height));
+   return ps;
+}
+
+void
+nvc0_miptree_surface_del(struct pipe_surface *ps)
+{
+   struct nvc0_surface *s = nvc0_surface(ps);
+
+   pipe_resource_reference(&ps->texture, NULL);
+
+   FREE(s);
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c
new file mode 100644 (file)
index 0000000..e38f6ce
--- /dev/null
@@ -0,0 +1,685 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define NOUVEAU_DEBUG 1
+
+#include "nvc0_pc.h"
+#include "nvc0_program.h"
+
+boolean
+nvc0_insn_can_load(struct nv_instruction *nvi, int s,
+                   struct nv_instruction *ld)
+{
+   int i;
+
+   if (ld->opcode == NV_OP_MOV && ld->src[0]->value->reg.file == NV_FILE_IMM) {
+      if (s > 1 || !(nvc0_op_info_table[nvi->opcode].immediate & (1 << s)))
+         return FALSE;
+      if (!(nvc0_op_info_table[nvi->opcode].immediate & 4))
+         if (ld->src[0]->value->reg.imm.u32 & 0xfff)
+            return FALSE;
+   } else
+   if (!(nvc0_op_info_table[nvi->opcode].memory & (1 << s)))
+      return FALSE;
+
+   if (ld->indirect >= 0)
+      return FALSE;
+
+   for (i = 0; i < 3 && nvi->src[i]; ++i)
+      if (nvi->src[i]->value->reg.file == NV_FILE_IMM)
+         return FALSE;
+
+   return TRUE;
+}
+
+/* Return whether this instruction can be executed conditionally. */
+boolean
+nvc0_insn_is_predicateable(struct nv_instruction *nvi)
+{
+   int s;
+
+   if (!nv_op_predicateable(nvi->opcode))
+      return FALSE;
+   if (nvi->predicate >= 0)
+      return FALSE;
+   for (s = 0; s < 4 && nvi->src[s]; ++s)
+      if (nvi->src[s]->value->reg.file == NV_FILE_IMM)
+         return FALSE;
+   return TRUE;
+}
+
+int
+nvc0_insn_refcount(struct nv_instruction *nvi)
+{
+   int rc = 0;
+   int i;
+   for (i = 0; i < 5 && nvi->def[i]; ++i) {
+      if (!nvi->def[i])
+         return rc;
+      rc += nvi->def[i]->refc;
+   }
+   return rc;
+}
+
+int
+nvc0_pc_replace_value(struct nv_pc *pc,
+                     struct nv_value *old_val,
+                     struct nv_value *new_val)
+{
+   int i, n, s;
+
+   if (old_val == new_val)
+      return old_val->refc;
+
+   for (i = 0, n = 0; i < pc->num_refs; ++i) {
+      if (pc->refs[i]->value == old_val) {
+         ++n;
+         for (s = 0; s < 6 && pc->refs[i]->insn->src[s]; ++s)
+            if (pc->refs[i]->insn->src[s] == pc->refs[i])
+               break;
+         assert(s < 6);
+         nv_reference(pc, pc->refs[i]->insn, s, new_val);
+      }
+   }
+   return n;
+}
+
+struct nv_value *
+nvc0_pc_find_constant(struct nv_ref *ref)
+{
+   struct nv_value *src;
+
+   if (!ref)
+      return NULL;
+
+   src = ref->value;
+   while (src->insn && src->insn->opcode == NV_OP_MOV) {
+      assert(!src->insn->src[0]->mod);
+      src = src->insn->src[0]->value;
+   }
+   if ((src->reg.file == NV_FILE_IMM) ||
+       (src->insn &&
+        src->insn->opcode == NV_OP_LD &&
+        src->insn->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
+        src->insn->src[0]->value->reg.file <= NV_FILE_MEM_C(15)))
+      return src;
+   return NULL;
+}
+
+struct nv_value *
+nvc0_pc_find_immediate(struct nv_ref *ref)
+{
+   struct nv_value *src = nvc0_pc_find_constant(ref);
+
+   return (src && src->reg.file == NV_FILE_IMM) ? src : NULL;
+}
+
+static void
+nv_pc_free_refs(struct nv_pc *pc)
+{
+   int i;
+   for (i = 0; i < pc->num_refs; i += 64)
+      FREE(pc->refs[i]);
+   FREE(pc->refs);
+}
+
+static const char *
+edge_name(ubyte type)
+{
+   switch (type) {
+   case CFG_EDGE_FORWARD: return "forward";
+   case CFG_EDGE_BACK: return "back";
+   case CFG_EDGE_LOOP_ENTER: return "loop";
+   case CFG_EDGE_LOOP_LEAVE: return "break";
+   case CFG_EDGE_FAKE: return "fake";
+   default:
+      return "?";
+   }
+}
+
+void
+nvc0_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f,
+                      void *priv)
+{
+   struct nv_basic_block *bb[64], *bbb[16], *b;
+   int j, p, pp;
+
+   bb[0] = root;
+   p = 1;
+   pp = 0;
+
+   while (p > 0) {
+      b = bb[--p];
+      b->priv = 0;
+
+      for (j = 1; j >= 0; --j) {
+         if (!b->out[j])
+            continue;
+
+         switch (b->out_kind[j]) {
+         case CFG_EDGE_BACK:
+            continue;
+         case CFG_EDGE_FORWARD:
+         case CFG_EDGE_FAKE:
+            if (++b->out[j]->priv == b->out[j]->num_in)
+               bb[p++] = b->out[j];
+            break;
+         case CFG_EDGE_LOOP_ENTER:
+            bb[p++] = b->out[j];
+            break;
+         case CFG_EDGE_LOOP_LEAVE:
+            bbb[pp++] = b->out[j];
+            break;
+         default:
+            assert(0);
+            break;
+         }
+      }
+
+      f(priv, b);
+
+      if (!p) {
+         p = pp;
+         for (; pp > 0; --pp)
+            bb[pp - 1] = bbb[pp - 1];
+      }
+   }
+}
+
+static void
+nv_do_print_function(void *priv, struct nv_basic_block *b)
+{
+   struct nv_instruction *i;
+
+   debug_printf("=== BB %i ", b->id);
+   if (b->out[0])
+      debug_printf("[%s -> %i] ", edge_name(b->out_kind[0]), b->out[0]->id);
+   if (b->out[1])
+      debug_printf("[%s -> %i] ", edge_name(b->out_kind[1]), b->out[1]->id);
+   debug_printf("===\n");
+
+   i = b->phi;
+   if (!i)
+      i = b->entry;
+   for (; i; i = i->next)
+      nvc0_print_instruction(i);
+}
+
+void
+nvc0_print_function(struct nv_basic_block *root)
+{
+   if (root->subroutine)
+      debug_printf("SUBROUTINE %i\n", root->subroutine);
+   else
+      debug_printf("MAIN\n");
+
+   nvc0_pc_pass_in_order(root, nv_do_print_function, root);
+}
+
+void
+nvc0_print_program(struct nv_pc *pc)
+{
+   int i;
+   for (i = 0; i < pc->num_subroutines + 1; ++i)
+      if (pc->root[i])
+         nvc0_print_function(pc->root[i]);
+}
+
+#if NOUVEAU_DEBUG > 1
+static void
+nv_do_print_cfgraph(struct nv_pc *pc, FILE *f, struct nv_basic_block *b)
+{
+   int i;
+
+   b->pass_seq = pc->pass_seq;
+
+   fprintf(f, "\t%i [shape=box]\n", b->id);
+
+   for (i = 0; i < 2; ++i) {
+      if (!b->out[i])
+         continue;
+      switch (b->out_kind[i]) {
+      case CFG_EDGE_FORWARD:
+         fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id);
+         break;
+      case CFG_EDGE_LOOP_ENTER:
+         fprintf(f, "\t%i -> %i [color=green];\n", b->id, b->out[i]->id);
+         break;
+      case CFG_EDGE_LOOP_LEAVE:
+         fprintf(f, "\t%i -> %i [color=red];\n", b->id, b->out[i]->id);
+         break;
+      case CFG_EDGE_BACK:
+         fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id);
+         continue;
+      case CFG_EDGE_FAKE:
+         fprintf(f, "\t%i -> %i [style=dotted];\n", b->id, b->out[i]->id);
+         break;
+      default:
+         assert(0);
+         break;
+      }
+      if (b->out[i]->pass_seq < pc->pass_seq)
+         nv_do_print_cfgraph(pc, f, b->out[i]);
+   }
+}
+
+/* Print the control flow graph of subroutine @subr (0 == MAIN) to a file. */
+static void
+nv_print_cfgraph(struct nv_pc *pc, const char *filepath, int subr)
+{
+   FILE *f;
+
+   f = fopen(filepath, "a");
+   if (!f)
+      return;
+
+   fprintf(f, "digraph G {\n");
+
+   ++pc->pass_seq;
+
+   nv_do_print_cfgraph(pc, f, pc->root[subr]);
+
+   fprintf(f, "}\n");
+
+   fclose(f);
+}
+#endif
+
+static INLINE void
+nvc0_pc_print_binary(struct nv_pc *pc)
+{
+   unsigned i;
+
+   NOUVEAU_DBG("nvc0_pc_print_binary(%u ops)\n", pc->emit_size / 8);
+
+   for (i = 0; i < pc->emit_size / 4; i += 2) {
+      debug_printf("0x%08x ", pc->emit[i + 0]);
+      debug_printf("0x%08x ", pc->emit[i + 1]);
+      if ((i % 16) == 15)
+         debug_printf("\n");
+   }
+   debug_printf("\n");
+}
+
+static int
+nvc0_emit_program(struct nv_pc *pc)
+{
+   uint32_t *code = pc->emit;
+   int n;
+
+   NOUVEAU_DBG("emitting program: size = %u\n", pc->emit_size);
+
+   for (n = 0; n < pc->num_blocks; ++n) {
+      struct nv_instruction *i;
+      struct nv_basic_block *b = pc->bb_list[n];
+
+      for (i = b->entry; i; i = i->next) {
+         nvc0_emit_instruction(pc, i);
+         pc->emit += 2;
+         pc->emit_pos += 2;
+      }
+   }
+   assert(pc->emit == &code[pc->emit_size / 4]);
+
+   pc->emit[0] = 0x00001de7;
+   pc->emit[1] = 0x80000000;
+   pc->emit_size += 8;
+
+   pc->emit = code;
+
+#ifdef NOUVEAU_DEBUG
+   nvc0_pc_print_binary(pc);
+#else
+   debug_printf("not printing binary\n");
+#endif
+   return 0;
+}
+
+int
+nvc0_generate_code(struct nvc0_translation_info *ti)
+{
+   struct nv_pc *pc;
+   int ret;
+   int i;
+
+   pc = CALLOC_STRUCT(nv_pc);
+   if (!pc)
+      return 1;
+
+   pc->is_fragprog = ti->prog->type == PIPE_SHADER_FRAGMENT;
+
+   pc->root = CALLOC(ti->num_subrs + 1, sizeof(pc->root[0]));
+   if (!pc->root) {
+      FREE(pc);
+      return 1;
+   }
+   pc->num_subroutines = ti->num_subrs;
+
+   ret = nvc0_tgsi_to_nc(pc, ti);
+   if (ret)
+      goto out;
+#if NOUVEAU_DEBUG > 1
+   nvc0_print_program(pc);
+#endif
+
+   pc->opt_reload_elim = ti->require_stores ? FALSE : TRUE;
+
+   /* optimization */
+   ret = nvc0_pc_exec_pass0(pc);
+   if (ret)
+      goto out;
+#ifdef NOUVEAU_DEBUG
+   nvc0_print_program(pc);
+#endif
+
+   /* register allocation */
+   ret = nvc0_pc_exec_pass1(pc);
+   if (ret)
+      goto out;
+#if NOUVEAU_DEBUG > 1
+   nv_print_program(pc);
+   nv_print_cfgraph(pc, "nvc0_shader_cfgraph.dot", 0);
+#endif
+
+   /* prepare for emission */
+   ret = nvc0_pc_exec_pass2(pc);
+   if (ret)
+      goto out;
+   assert(!(pc->emit_size % 8));
+
+   pc->emit = CALLOC(pc->emit_size / 4 + 2, 4);
+   if (!pc->emit) {
+      ret = 3;
+      goto out;
+   }
+   ret = nvc0_emit_program(pc);
+   if (ret)
+      goto out;
+
+   ti->prog->code = pc->emit;
+   ti->prog->code_base = 0;
+   ti->prog->code_size = pc->emit_size;
+   ti->prog->parm_size = 0;
+
+   ti->prog->max_gpr = MAX2(4, pc->max_reg[NV_FILE_GPR] + 1);
+
+   ti->prog->relocs = pc->reloc_entries;
+   ti->prog->num_relocs = pc->num_relocs;
+
+   NOUVEAU_DBG("SHADER TRANSLATION - %s\n", ret ? "failure" : "success");
+
+out:
+   nv_pc_free_refs(pc);
+
+   for (i = 0; i < pc->num_blocks; ++i)
+      FREE(pc->bb_list[i]);
+   if (pc->root)
+      FREE(pc->root);
+   if (ret) {
+      /* on success, these will be referenced by struct nvc0_program */
+      if (pc->emit)
+         FREE(pc->emit);
+      if (pc->immd_buf)
+         FREE(pc->immd_buf);
+      if (pc->reloc_entries)
+         FREE(pc->reloc_entries);
+   }
+   FREE(pc);
+   return ret;
+}
+
+static void
+nvbb_insert_phi(struct nv_basic_block *b, struct nv_instruction *i)
+{
+   if (!b->phi) {
+      i->prev = NULL;
+      b->phi = i;
+      i->next = b->entry;
+      if (b->entry) {
+         assert(!b->entry->prev && b->exit);
+         b->entry->prev = i;
+      } else {
+         b->entry = i;
+         b->exit = i;
+      }
+   } else {
+      assert(b->entry);
+      if (b->entry->opcode == NV_OP_PHI) { /* insert after entry */
+         assert(b->entry == b->exit);
+         b->entry->next = i;
+         i->prev = b->entry;
+         b->entry = i;
+         b->exit = i;
+      } else { /* insert before entry */
+         assert(b->entry->prev && b->exit);
+         i->next = b->entry;
+         i->prev = b->entry->prev;
+         b->entry->prev = i;
+         i->prev->next = i;
+      }
+   }
+}
+
+void
+nvc0_insn_append(struct nv_basic_block *b, struct nv_instruction *i)
+{
+   if (i->opcode == NV_OP_PHI) {
+      nvbb_insert_phi(b, i);
+   } else {
+      i->prev = b->exit;
+      if (b->exit)
+         b->exit->next = i;
+      b->exit = i;
+      if (!b->entry)
+         b->entry = i;
+      else
+      if (i->prev && i->prev->opcode == NV_OP_PHI)
+         b->entry = i;
+   }
+
+   i->bb = b;
+   b->num_instructions++;
+}
+
+void
+nvc0_insn_insert_after(struct nv_instruction *at, struct nv_instruction *ni)
+{
+   if (!at->next) {
+      nvc0_insn_append(at->bb, ni);
+      return;
+   }
+   ni->next = at->next;
+   ni->prev = at;
+   ni->next->prev = ni;
+   ni->prev->next = ni;
+}
+
+void
+nvc0_insn_delete(struct nv_instruction *nvi)
+{
+   struct nv_basic_block *b = nvi->bb;
+   int s;
+
+   /* debug_printf("REM: "); nv_print_instruction(nvi); */
+
+   for (s = 0; s < 6 && nvi->src[s]; ++s)
+      nv_reference(NULL, nvi, s, NULL);
+
+   if (nvi->next)
+      nvi->next->prev = nvi->prev;
+   else {
+      assert(nvi == b->exit);
+      b->exit = nvi->prev;
+   }
+
+   if (nvi->prev)
+      nvi->prev->next = nvi->next;
+
+   if (nvi == b->entry) {
+      /* PHIs don't get hooked to b->entry */
+      b->entry = nvi->next;
+      assert(!nvi->prev || nvi->prev->opcode == NV_OP_PHI);
+   }
+
+   if (nvi == b->phi) {
+      if (nvi->opcode != NV_OP_PHI)
+         NOUVEAU_DBG("NOTE: b->phi points to non-PHI instruction\n");
+
+      assert(!nvi->prev);
+      if (!nvi->next || nvi->next->opcode != NV_OP_PHI)
+         b->phi = NULL;
+      else
+         b->phi = nvi->next;
+   }
+}
+
+void
+nvc0_insns_permute(struct nv_instruction *i1, struct nv_instruction *i2)
+{
+   struct nv_basic_block *b = i1->bb;
+
+   assert(i1->opcode != NV_OP_PHI &&
+          i2->opcode != NV_OP_PHI);
+   assert(i1->next == i2);
+
+   if (b->exit == i2)
+      b->exit = i1;
+
+   if (b->entry == i1)
+      b->entry = i2;
+
+   i2->prev = i1->prev;
+   i1->next = i2->next;
+   i2->next = i1;
+   i1->prev = i2;
+
+   if (i2->prev)
+      i2->prev->next = i2;
+   if (i1->next)
+      i1->next->prev = i1;
+}
+
+void
+nvc0_bblock_attach(struct nv_basic_block *parent,
+                  struct nv_basic_block *b, ubyte edge_kind)
+{
+   assert(b->num_in < 8);
+
+   if (parent->out[0]) {
+      assert(!parent->out[1]);
+      parent->out[1] = b;
+      parent->out_kind[1] = edge_kind;
+   } else {
+      parent->out[0] = b;
+      parent->out_kind[0] = edge_kind;
+   }
+
+   b->in[b->num_in] = parent;
+   b->in_kind[b->num_in++] = edge_kind;
+}
+
+/* NOTE: all BRKs are treated as conditional, so there are 2 outgoing BBs */
+
+boolean
+nvc0_bblock_dominated_by(struct nv_basic_block *b, struct nv_basic_block *d)
+{
+   int j;
+
+   if (b == d)
+      return TRUE;
+
+   for (j = 0; j < b->num_in; ++j)
+      if ((b->in_kind[j] != CFG_EDGE_BACK) &&
+          !nvc0_bblock_dominated_by(b->in[j], d))
+         return FALSE;
+
+   return j ? TRUE : FALSE;
+}
+
+/* check if @bf (future) can be reached from @bp (past), stop at @bt */
+boolean
+nvc0_bblock_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp,
+                        struct nv_basic_block *bt)
+{
+   struct nv_basic_block *q[NV_PC_MAX_BASIC_BLOCKS], *b;
+   int i, p, n;
+
+   p = 0;
+   n = 1;
+   q[0] = bp;
+
+   while (p < n) {
+      b = q[p++];
+
+      if (b == bf)
+         break;
+      if (b == bt)
+         continue;
+      assert(n <= (1024 - 2));
+
+      for (i = 0; i < 2; ++i) {
+         if (b->out[i] && !IS_WALL_EDGE(b->out_kind[i]) && !b->out[i]->priv) {
+            q[n] = b->out[i];
+            q[n++]->priv = 1;
+         }
+      }
+   }
+   for (--n; n >= 0; --n)
+      q[n]->priv = 0;
+
+   return (b == bf);
+}
+
+static struct nv_basic_block *
+nvbb_find_dom_frontier(struct nv_basic_block *b, struct nv_basic_block *df)
+{
+   struct nv_basic_block *out;
+   int i;
+
+   if (!nvc0_bblock_dominated_by(df, b)) {
+      for (i = 0; i < df->num_in; ++i) {
+         if (df->in_kind[i] == CFG_EDGE_BACK)
+            continue;
+         if (nvc0_bblock_dominated_by(df->in[i], b))
+            return df;
+      }
+   }
+   for (i = 0; i < 2 && df->out[i]; ++i) {
+      if (df->out_kind[i] == CFG_EDGE_BACK)
+         continue;
+      if ((out = nvbb_find_dom_frontier(b, df->out[i])))
+         return out;
+   }
+   return NULL;
+}
+
+struct nv_basic_block *
+nvc0_bblock_dom_frontier(struct nv_basic_block *b)
+{
+   struct nv_basic_block *df;
+   int i;
+
+   for (i = 0; i < 2 && b->out[i]; ++i)
+      if ((df = nvbb_find_dom_frontier(b, b->out[i])))
+         return df;
+   return NULL;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h
new file mode 100644 (file)
index 0000000..b48b0b1
--- /dev/null
@@ -0,0 +1,648 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NVC0_COMPILER_H__
+#define __NVC0_COMPILER_H__
+
+#include <stdio.h>
+
+#ifndef NOUVEAU_DBG
+#ifdef NOUVEAU_DEBUG
+# define NOUVEAU_DBG(args...) debug_printf(args);
+#else
+# define NOUVEAU_DBG(args...)
+#endif
+#endif
+
+#ifndef NOUVEAU_ERR
+#define NOUVEAU_ERR(fmt, args...) \
+   fprintf(stderr, "%s:%d -  "fmt, __FUNCTION__, __LINE__, ##args);
+#endif
+
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_double_list.h"
+
+/* pseudo opcodes */
+#define NV_OP_UNDEF      0
+#define NV_OP_BIND       1
+#define NV_OP_MERGE      2
+#define NV_OP_PHI        3
+#define NV_OP_SELECT     4
+#define NV_OP_NOP        5
+
+/**
+ * BIND forces source operand i into the same register as destination operand i
+ * SELECT forces its multiple source operands and its destination operand into
+ *  one and the same register.
+ */
+
+/* base opcodes */
+#define NV_OP_LD         6
+#define NV_OP_ST         7
+#define NV_OP_MOV        8
+#define NV_OP_AND        9
+#define NV_OP_OR        10
+#define NV_OP_XOR       11
+#define NV_OP_SHL       12
+#define NV_OP_SHR       13
+#define NV_OP_NOT       14
+#define NV_OP_SET       15
+#define NV_OP_ADD       16
+#define NV_OP_SUB       17
+#define NV_OP_MUL       18
+#define NV_OP_MAD       19
+#define NV_OP_ABS       20
+#define NV_OP_NEG       21
+#define NV_OP_MAX       22
+#define NV_OP_MIN       23
+#define NV_OP_CVT       24
+#define NV_OP_CEIL      25
+#define NV_OP_FLOOR     26
+#define NV_OP_TRUNC     27
+#define NV_OP_SAD       28
+
+/* shader opcodes */
+#define NV_OP_VFETCH    29
+#define NV_OP_PFETCH    30
+#define NV_OP_EXPORT    31
+#define NV_OP_LINTERP   32
+#define NV_OP_PINTERP   33
+#define NV_OP_EMIT      34
+#define NV_OP_RESTART   35
+#define NV_OP_TEX       36
+#define NV_OP_TXB       37
+#define NV_OP_TXL       38
+#define NV_OP_TXF       39
+#define NV_OP_TXQ       40
+#define NV_OP_QUADOP    41
+#define NV_OP_DFDX      42
+#define NV_OP_DFDY      43
+#define NV_OP_KIL       44
+
+/* control flow opcodes */
+#define NV_OP_BRA       45
+#define NV_OP_CALL      46
+#define NV_OP_RET       47
+#define NV_OP_EXIT      48
+#define NV_OP_BREAK     49
+#define NV_OP_BREAKADDR 50
+#define NV_OP_JOINAT    51
+#define NV_OP_JOIN      52
+
+/* typed opcodes */
+#define NV_OP_ADD_F32   NV_OP_ADD
+#define NV_OP_ADD_B32   53
+#define NV_OP_MUL_F32   NV_OP_MUL
+#define NV_OP_MUL_B32   54
+#define NV_OP_ABS_F32   NV_OP_ABS
+#define NV_OP_ABS_S32   55
+#define NV_OP_NEG_F32   NV_OP_NEG
+#define NV_OP_NEG_S32   56
+#define NV_OP_MAX_F32   NV_OP_MAX
+#define NV_OP_MAX_S32   57
+#define NV_OP_MAX_U32   58
+#define NV_OP_MIN_F32   NV_OP_MIN
+#define NV_OP_MIN_S32   59
+#define NV_OP_MIN_U32   60
+#define NV_OP_SET_F32   61
+#define NV_OP_SET_S32   62
+#define NV_OP_SET_U32   63
+#define NV_OP_SAR       64
+#define NV_OP_RCP       65
+#define NV_OP_RSQ       66
+#define NV_OP_LG2       67
+#define NV_OP_SIN       68
+#define NV_OP_COS       69
+#define NV_OP_EX2       70
+#define NV_OP_PRESIN    71
+#define NV_OP_PREEX2    72
+#define NV_OP_SAT       73
+
+/* newly added opcodes */
+#define NV_OP_SET_F32_AND 74
+#define NV_OP_SET_F32_OR  75
+#define NV_OP_SET_F32_XOR 76
+#define NV_OP_SELP        77
+#define NV_OP_SLCT        78
+#define NV_OP_SLCT_F32    NV_OP_SLCT
+#define NV_OP_SLCT_S32    79
+#define NV_OP_SLCT_U32    80
+#define NV_OP_SUB_F32     NV_OP_SUB
+#define NV_OP_SUB_S32     81
+#define NV_OP_MAD_F32     NV_OP_MAD
+#define NV_OP_FSET_F32    82
+
+#define NV_OP_COUNT     83
+
+/* nv50 files omitted */
+#define NV_FILE_GPR      0
+#define NV_FILE_COND     1
+#define NV_FILE_PRED     2
+#define NV_FILE_IMM      16
+#define NV_FILE_MEM_S    32
+#define NV_FILE_MEM_V    34
+#define NV_FILE_MEM_A    35
+#define NV_FILE_MEM_L    48
+#define NV_FILE_MEM_G    64
+#define NV_FILE_MEM_C(i) (80 + i)
+
+#define NV_IS_MEMORY_FILE(f) ((f) >= NV_FILE_MEM_S)
+
+#define NV_MOD_NEG 1
+#define NV_MOD_ABS 2
+#define NV_MOD_NOT 4
+#define NV_MOD_SAT 8
+
+#define NV_TYPE_U8  0x00
+#define NV_TYPE_S8  0x01
+#define NV_TYPE_U16 0x02
+#define NV_TYPE_S16 0x03
+#define NV_TYPE_U32 0x04
+#define NV_TYPE_S32 0x05
+#define NV_TYPE_P32 0x07
+#define NV_TYPE_F32 0x09
+#define NV_TYPE_F64 0x0b
+#define NV_TYPE_VEC(x, n) (NV_TYPE_##x | (n << 4))
+#define NV_TYPE_ANY 0xff
+
+#define NV_TYPE_ISINT(t) ((t) < 7)
+#define NV_TYPE_ISSGD(t) ((t) & 1)
+
+#define NV_CC_FL 0x0
+#define NV_CC_LT 0x1
+#define NV_CC_EQ 0x2
+#define NV_CC_LE 0x3
+#define NV_CC_GT 0x4
+#define NV_CC_NE 0x5
+#define NV_CC_GE 0x6
+#define NV_CC_U  0x8
+#define NV_CC_TR 0xf
+#define NV_CC_O  0x10
+#define NV_CC_C  0x11
+#define NV_CC_A  0x12
+#define NV_CC_S  0x13
+
+#define NV_PC_MAX_INSTRUCTIONS 2048
+#define NV_PC_MAX_VALUES (NV_PC_MAX_INSTRUCTIONS * 4)
+
+#define NV_PC_MAX_BASIC_BLOCKS 1024
+
+struct nv_op_info {
+   uint base;                /* e.g. ADD_S32 -> ADD */
+   char name[12];
+   uint8_t type;
+   uint8_t mods;
+   unsigned flow        : 1;
+   unsigned commutative : 1;
+   unsigned vector      : 1;
+   unsigned predicate   : 1;
+   unsigned pseudo      : 1;
+   unsigned immediate   : 3;
+   unsigned memory      : 3;
+};
+
+extern struct nv_op_info nvc0_op_info_table[];
+
+#define NV_BASEOP(op) (nvc0_op_info_table[op].base)
+#define NV_OPTYPE(op) (nvc0_op_info_table[op].type)
+
+static INLINE uint
+nv_op_base(uint opcode)
+{
+   return nvc0_op_info_table[opcode].base;
+}
+
+static INLINE boolean
+nv_is_texture_op(uint opcode)
+{
+   return (opcode >= NV_OP_TEX && opcode <= NV_OP_TXQ);
+}
+
+static INLINE boolean
+nv_is_vector_op(uint opcode)
+{
+   return nvc0_op_info_table[opcode].vector ? TRUE : FALSE;
+}
+
+static INLINE boolean
+nv_op_commutative(uint opcode)
+{
+   return nvc0_op_info_table[opcode].commutative ? TRUE : FALSE;
+}
+
+static INLINE uint8_t
+nv_op_supported_src_mods(uint opcode)
+{
+   return nvc0_op_info_table[opcode].mods;
+}
+
+static INLINE boolean
+nv_op_predicateable(uint opcode)
+{
+   return nvc0_op_info_table[opcode].predicate ? TRUE : FALSE;
+}
+
+static INLINE uint
+nv_type_order(ubyte type)
+{
+   switch (type & 0xf) {
+   case NV_TYPE_U8:
+   case NV_TYPE_S8:
+      return 0;
+   case NV_TYPE_U16:
+   case NV_TYPE_S16:
+      return 1;
+   case NV_TYPE_U32:
+   case NV_TYPE_F32:
+   case NV_TYPE_S32:
+   case NV_TYPE_P32:
+      return 2;
+   case NV_TYPE_F64:
+      return 3;
+   }
+   assert(0);
+   return 0;
+}
+
+static INLINE uint
+nv_type_sizeof(ubyte type)
+{
+   if (type & 0xf0)
+      return (1 << nv_type_order(type)) * (type >> 4);
+   return 1 << nv_type_order(type);
+}
+
+static INLINE uint
+nv_type_sizeof_base(ubyte type)
+{
+   return 1 << nv_type_order(type);
+}
+
+struct nv_reg {
+   uint32_t address; /* for memory locations */
+   int id; /* for registers */
+   ubyte file;
+   ubyte size;
+   union {
+      int32_t s32;
+      int64_t s64;
+      uint64_t u64;
+      uint32_t u32;
+      float f32;
+      double f64;
+   } imm;
+};
+
+struct nv_range {
+   struct nv_range *next;
+   int bgn;
+   int end;
+};
+
+struct nv_ref;
+
+struct nv_value {
+   struct nv_reg reg; 
+   struct nv_instruction *insn;
+   struct nv_value *join;
+   struct nv_ref *last_use;
+   int n;
+   struct nv_range *livei;
+   int refc;
+   struct nv_value *next;
+   struct nv_value *prev;
+};
+
+struct nv_ref {
+   struct nv_value *value;
+   struct nv_instruction *insn;
+   struct list_head list; /* connects uses of the same value */
+   uint8_t mod;
+   uint8_t flags;
+};
+
+struct nv_basic_block;
+
+struct nv_instruction {
+   struct nv_instruction *next;
+   struct nv_instruction *prev;
+   uint opcode;
+   uint serial;
+
+   struct nv_value *def[5];
+   struct nv_ref *src[6];
+
+   int8_t predicate; /* index of predicate src */
+   int8_t indirect;  /* index of pointer src */
+
+   union {
+      struct {
+         uint8_t t; /* TIC binding */
+         uint8_t s; /* TSC binding */
+      } tex;
+      struct {
+         uint8_t d; /* output type */
+         uint8_t s; /* input type */
+      } cvt;
+   } ext;
+
+   struct nv_basic_block *bb;
+   struct nv_basic_block *target; /* target block of control flow insn */
+
+   unsigned cc         : 5; /* condition code */
+   unsigned fixed      : 1; /* don't optimize away (prematurely) */
+   unsigned terminator : 1;
+   unsigned join       : 1;
+   unsigned set_cond   : 4; /* 2nd byte */
+   unsigned saturate   : 1;
+   unsigned centroid   : 1;
+   unsigned flat       : 1;
+   unsigned patch      : 1;
+   unsigned lanes      : 4; /* 3rd byte */
+   unsigned tex_argc   : 3;
+   unsigned tex_live   : 1;
+   unsigned tex_cube   : 1; /* 4th byte */
+   unsigned tex_mask   : 4;
+
+   uint8_t quadop;
+};
+
+static INLINE int
+nvi_vector_size(struct nv_instruction *nvi)
+{
+   int i;
+   assert(nvi);
+   for (i = 0; i < 5 && nvi->def[i]; ++i);
+   return i;
+}
+
+#define CFG_EDGE_FORWARD     0
+#define CFG_EDGE_BACK        1
+#define CFG_EDGE_LOOP_ENTER  2
+#define CFG_EDGE_LOOP_LEAVE  4
+#define CFG_EDGE_FAKE        8
+
+/* 'WALL' edge means where reachability check doesn't follow */
+/* 'LOOP' edge means just having to do with loops */
+#define IS_LOOP_EDGE(k) ((k) & 7)
+#define IS_WALL_EDGE(k) ((k) & 9)
+
+struct nv_basic_block {
+   struct nv_instruction *entry; /* first non-phi instruction */
+   struct nv_instruction *exit;
+   struct nv_instruction *phi; /* very first instruction */
+   int num_instructions;
+
+   struct nv_basic_block *out[2]; /* no indirect branches -> 2 */
+   struct nv_basic_block *in[8]; /* hope that suffices */
+   uint num_in;
+   ubyte out_kind[2];
+   ubyte in_kind[8];
+
+   int id;
+   int subroutine;
+   uint priv; /* reset to 0 after you're done */
+   uint pass_seq;
+
+   uint32_t emit_pos; /* position, size in emitted code */
+   uint32_t emit_size;
+
+   uint32_t live_set[NV_PC_MAX_VALUES / 32];
+};
+
+struct nvc0_translation_info;
+
+struct nv_pc {
+   struct nv_basic_block **root;
+   struct nv_basic_block *current_block;
+   struct nv_basic_block *parent_block;
+
+   int loop_nesting_bound;
+   uint pass_seq;
+
+   struct nv_value values[NV_PC_MAX_VALUES];
+   struct nv_instruction instructions[NV_PC_MAX_INSTRUCTIONS];
+   struct nv_ref **refs;
+   struct nv_basic_block *bb_list[NV_PC_MAX_BASIC_BLOCKS];
+   int num_values;
+   int num_instructions;
+   int num_refs;
+   int num_blocks;
+   int num_subroutines;
+
+   int max_reg[4];
+
+   uint32_t *immd_buf; /* populated on emit */
+   unsigned immd_count;
+
+   uint32_t *emit;
+   unsigned emit_size;
+   unsigned emit_pos;
+
+   void *reloc_entries;
+   unsigned num_relocs;
+
+   /* optimization enables */
+   boolean opt_reload_elim;
+   boolean is_fragprog;
+};
+
+void nvc0_insn_append(struct nv_basic_block *, struct nv_instruction *);
+void nvc0_insn_insert_after(struct nv_instruction *, struct nv_instruction *);
+
+static INLINE struct nv_instruction *
+nv_alloc_instruction(struct nv_pc *pc, uint opcode)
+{
+   struct nv_instruction *insn;
+
+   insn = &pc->instructions[pc->num_instructions++];
+   assert(pc->num_instructions < NV_PC_MAX_INSTRUCTIONS);
+
+   insn->opcode = opcode;
+   insn->cc = 0;
+   insn->indirect = -1;
+   insn->predicate = -1;
+
+   return insn;
+}
+
+static INLINE struct nv_instruction *
+new_instruction(struct nv_pc *pc, uint opcode)
+{
+   struct nv_instruction *insn = nv_alloc_instruction(pc, opcode);
+
+   nvc0_insn_append(pc->current_block, insn);
+   return insn;
+}
+
+static INLINE struct nv_instruction *
+new_instruction_at(struct nv_pc *pc, struct nv_instruction *at, uint opcode)
+{
+   struct nv_instruction *insn = nv_alloc_instruction(pc, opcode);
+
+   nvc0_insn_insert_after(at, insn);
+   return insn;
+}
+
+static INLINE struct nv_value *
+new_value(struct nv_pc *pc, ubyte file, ubyte size)
+{
+   struct nv_value *value = &pc->values[pc->num_values];
+
+   assert(pc->num_values < NV_PC_MAX_VALUES - 1);
+
+   value->n = pc->num_values++;
+   value->join = value;
+   value->reg.id = -1;
+   value->reg.file = file;
+   value->reg.size = size;
+   return value;
+}
+
+static INLINE struct nv_value *
+new_value_like(struct nv_pc *pc, struct nv_value *like)
+{
+   return new_value(pc, like->reg.file, like->reg.size);
+}
+
+static INLINE struct nv_ref *
+new_ref(struct nv_pc *pc, struct nv_value *val)
+{
+   int i;
+   struct nv_ref *ref;
+
+   if ((pc->num_refs % 64) == 0) {
+      const unsigned old_size = pc->num_refs * sizeof(struct nv_ref *);
+      const unsigned new_size = (pc->num_refs + 64) * sizeof(struct nv_ref *);
+
+      pc->refs = REALLOC(pc->refs, old_size, new_size);
+
+      ref = CALLOC(64, sizeof(struct nv_ref));
+      for (i = 0; i < 64; ++i)
+         pc->refs[pc->num_refs + i] = &ref[i];
+   }
+
+   ref = pc->refs[pc->num_refs++];
+   ref->value = val;
+
+   LIST_INITHEAD(&ref->list);
+
+   ++val->refc;
+   return ref;
+}
+
+static INLINE struct nv_basic_block *
+new_basic_block(struct nv_pc *pc)
+{
+   struct nv_basic_block *bb;
+
+   if (pc->num_blocks >= NV_PC_MAX_BASIC_BLOCKS)
+      return NULL;
+
+   bb = CALLOC_STRUCT(nv_basic_block);
+
+   bb->id = pc->num_blocks;
+   pc->bb_list[pc->num_blocks++] = bb;
+   return bb;
+}
+
+static INLINE void
+nv_reference(struct nv_pc *pc,
+             struct nv_instruction *nvi, int c, struct nv_value *s)
+{
+   struct nv_ref **d = &nvi->src[c];
+   assert(c < 6);
+
+   if (*d) {
+      --(*d)->value->refc;
+      LIST_DEL(&(*d)->list);
+   }
+
+   if (s) {
+      if (!*d) {
+         *d = new_ref(pc, s);
+         (*d)->insn = nvi;
+      } else {
+         LIST_DEL(&(*d)->list);
+         (*d)->value = s;
+         ++(s->refc);
+      }
+      if (!s->last_use)
+         s->last_use = *d;
+      else
+         LIST_ADDTAIL(&s->last_use->list, &(*d)->list);
+
+      s->last_use = *d;
+      (*d)->insn = nvi;
+   } else {
+      *d = NULL;
+   }
+}
+
+/* nvc0_emit.c */
+void nvc0_emit_instruction(struct nv_pc *, struct nv_instruction *);
+
+/* nvc0_print.c */
+const char *nvc0_opcode_name(uint opcode);
+void nvc0_print_instruction(struct nv_instruction *);
+
+/* nvc0_pc.c */
+void nvc0_print_function(struct nv_basic_block *root);
+void nvc0_print_program(struct nv_pc *);
+
+boolean nvc0_insn_can_load(struct nv_instruction *, int s,
+                           struct nv_instruction *);
+boolean nvc0_insn_is_predicateable(struct nv_instruction *);
+
+int nvc0_insn_refcount(struct nv_instruction *);
+void nvc0_insn_delete(struct nv_instruction *);
+void nvc0_insns_permute(struct nv_instruction *prev, struct nv_instruction *);
+
+void nvc0_bblock_attach(struct nv_basic_block *parent,
+                        struct nv_basic_block *child, ubyte edge_kind);
+boolean nvc0_bblock_dominated_by(struct nv_basic_block *,
+                                 struct nv_basic_block *);
+boolean nvc0_bblock_reachable_by(struct nv_basic_block *future,
+                                 struct nv_basic_block *past,
+                                 struct nv_basic_block *final);
+struct nv_basic_block *nvc0_bblock_dom_frontier(struct nv_basic_block *);
+
+int nvc0_pc_replace_value(struct nv_pc *pc,
+                          struct nv_value *old_val,
+                          struct nv_value *new_val);
+
+struct nv_value *nvc0_pc_find_immediate(struct nv_ref *);
+struct nv_value *nvc0_pc_find_constant(struct nv_ref *);
+
+typedef void (*nv_pc_pass_func)(void *priv, struct nv_basic_block *b);
+
+void nvc0_pc_pass_in_order(struct nv_basic_block *, nv_pc_pass_func, void *);
+
+int nvc0_pc_exec_pass0(struct nv_pc *pc);
+int nvc0_pc_exec_pass1(struct nv_pc *pc);
+int nvc0_pc_exec_pass2(struct nv_pc *pc);
+
+int nvc0_tgsi_to_nc(struct nv_pc *, struct nvc0_translation_info *);
+
+#endif // NV50_COMPILER_H
diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c
new file mode 100644 (file)
index 0000000..8a94175
--- /dev/null
@@ -0,0 +1,943 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nvc0_pc.h"
+#include "nvc0_program.h"
+
+#define NVC0_FIXUP_CODE_RELOC 0
+#define NVC0_FIXUP_DATA_RELOC 1
+
+struct nvc0_fixup {
+   uint8_t type;
+   int8_t shift;
+   uint32_t mask;
+   uint32_t data;
+   uint32_t ofst;
+};
+
+void
+nvc0_relocate_program(struct nvc0_program *prog,
+                      uint32_t code_base,
+                      uint32_t data_base)
+{
+   struct nvc0_fixup *f = (struct nvc0_fixup *)prog->relocs;
+   unsigned i;
+
+   for (i = 0; i < prog->num_relocs; ++i) {
+      uint32_t data;
+
+      switch (f[i].type) {
+      case NVC0_FIXUP_CODE_RELOC: data = code_base + f[i].data; break;
+      case NVC0_FIXUP_DATA_RELOC: data = data_base + f[i].data; break;
+      default:
+         data = f[i].data;
+         break;
+      }
+      data = (f[i].shift < 0) ? (data >> -f[i].shift) : (data << f[i].shift);
+
+      prog->code[f[i].ofst / 4] &= ~f[i].mask;
+      prog->code[f[i].ofst / 4] |= data & f[i].mask;
+   }
+}
+
+static void
+create_fixup(struct nv_pc *pc, uint8_t ty,
+             int w, uint32_t data, uint32_t m, int s)
+{
+   struct nvc0_fixup *f;
+
+   const unsigned size = sizeof(struct nvc0_fixup);
+   const unsigned n = pc->num_relocs;
+
+   if (!(n % 8))
+      pc->reloc_entries = REALLOC(pc->reloc_entries, n * size, (n + 8) * size);
+
+   f = (struct nvc0_fixup *)pc->reloc_entries;
+
+   f[n].ofst = (pc->emit_pos + w) * 4;
+   f[n].type = ty;
+   f[n].data = data;
+   f[n].mask = m;
+   f[n].shift = s;
+
+   ++pc->num_relocs;
+}
+
+static INLINE ubyte
+SSIZE(struct nv_instruction *nvi, int s)
+{
+   return nvi->src[s]->value->reg.size;
+}
+
+static INLINE ubyte
+DSIZE(struct nv_instruction *nvi, int d)
+{
+   return nvi->def[d]->reg.size;
+}
+
+static INLINE struct nv_reg *
+SREG(struct nv_ref *ref)
+{
+   if (!ref)
+      return NULL;
+   return &ref->value->join->reg;
+}
+
+static INLINE struct nv_reg *
+DREG(struct nv_value *val)
+{
+   if (!val)
+      return NULL;
+   return &val->join->reg;
+}
+
+static INLINE ubyte
+SFILE(struct nv_instruction *nvi, int s)
+{
+   return nvi->src[s]->value->reg.file;
+}
+
+static INLINE ubyte
+DFILE(struct nv_instruction *nvi, int d)
+{
+   return nvi->def[0]->reg.file;
+}
+
+static INLINE void
+SID(struct nv_pc *pc, struct nv_ref *ref, int pos)
+{
+   pc->emit[pos / 32] |= (SREG(ref) ? SREG(ref)->id : 63) << (pos % 32);
+}
+
+static INLINE void
+DID(struct nv_pc *pc, struct nv_value *val, int pos)
+{
+   pc->emit[pos / 32] |= (DREG(val) ? DREG(val)->id : 63) << (pos % 32);
+}
+
+static INLINE uint32_t
+get_immd_u32(struct nv_ref *ref) /* XXX: dependent on [0]:2 */
+{
+   assert(ref->value->reg.file == NV_FILE_IMM);
+   return ref->value->reg.imm.u32;
+}
+
+static INLINE void
+set_immd_u32_l(struct nv_pc *pc, uint32_t u32)
+{
+   pc->emit[0] |= (u32 & 0x3f) << 26;
+   pc->emit[1] |= u32 >> 6;
+}
+
+static INLINE void
+set_immd_u32(struct nv_pc *pc, uint32_t u32)
+{
+   if ((pc->emit[0] & 0xf) == 0x2) {
+      set_immd_u32_l(pc, u32);
+   } else {
+      assert(!(pc->emit[1] & 0xc000));
+      pc->emit[1] |= 0xc000;
+
+      assert(!(u32 & 0xfff));
+      set_immd_u32_l(pc, u32 >> 12);
+   }
+}
+
+static INLINE void
+set_immd(struct nv_pc *pc, struct nv_instruction *i, int s)
+{
+   set_immd_u32(pc, get_immd_u32(i->src[s]));
+}
+
+static INLINE void
+DVS(struct nv_pc *pc, struct nv_instruction *i)
+{
+   uint s = i->def[0]->reg.size;
+   int n;
+   for (n = 1; n < 4 && i->def[n]; ++n)
+      s += i->def[n]->reg.size;
+   pc->emit[0] |= ((s / 4) - 1) << 5;
+}
+
+static INLINE void
+SVS(struct nv_pc *pc, struct nv_ref *src)
+{
+   pc->emit[0] |= (SREG(src)->size / 4 - 1) << 5;
+}
+
+static void
+set_pred(struct nv_pc *pc, struct nv_instruction *i)
+{
+   if (i->predicate >= 0) {
+      SID(pc, i->src[i->predicate], 6);
+      if (i->cc)
+         pc->emit[0] |= 0x2000; /* negate */
+   } else {
+      pc->emit[0] |= 0x1c00;
+   }      
+}
+
+static INLINE void
+set_address_16(struct nv_pc *pc, struct nv_ref *src)
+{
+   pc->emit[0] |= (src->value->reg.address & 0x003f) << 26;
+   pc->emit[1] |= (src->value->reg.address & 0xffc0) >> 6;
+}
+
+static INLINE unsigned
+const_space_index(struct nv_instruction *i, int s)
+{
+   return SFILE(i, s) - NV_FILE_MEM_C(0);
+}
+
+static void
+emit_flow(struct nv_pc *pc, struct nv_instruction *i, uint8_t op)
+{
+   pc->emit[0] = 0x000001e7;
+   pc->emit[1] = op << 24;
+
+   set_pred(pc, i);
+
+   if (i->target) {
+      uint32_t pos = i->target->emit_pos;
+
+      create_fixup(pc, NVC0_FIXUP_CODE_RELOC, 0, pos, 26, 0xfc000000);
+      create_fixup(pc, NVC0_FIXUP_CODE_RELOC, 1, pos, -6, 0x0001ffff);
+
+      pc->emit[0] |= (pos & 0x3f) << 26;
+      pc->emit[1] |= (pos >> 6) & 0x1ffff;
+   }
+}
+
+/* doesn't work for vfetch, export, ld, st, mov ... */
+static void
+emit_form_0(struct nv_pc *pc, struct nv_instruction *i)
+{
+   int s;
+
+   set_pred(pc, i);
+
+   DID(pc, i->def[0], 14);
+
+   for (s = 0; s < 3 && i->src[s]; ++s) {
+      if (SFILE(i, s) >= NV_FILE_MEM_C(0) &&
+          SFILE(i, s) <= NV_FILE_MEM_C(15)) {
+         assert(!(pc->emit[1] & 0xc000));
+         assert(s <= 1);
+         pc->emit[1] |= 0x4000 | (const_space_index(i, s) << 10);
+         set_address_16(pc, i->src[s]);
+      } else
+      if (SFILE(i, s) == NV_FILE_GPR) {
+         SID(pc, i->src[s], s ? ((s == 2) ? 49 : 26) : 20);
+      } else
+      if (SFILE(i, s) == NV_FILE_IMM) {
+         assert(!(pc->emit[1] & 0xc000));
+         assert(s == 1 || i->opcode == NV_OP_MOV);
+         set_immd(pc, i, s);
+      }
+   }
+}
+
+static void
+emit_form_1(struct nv_pc *pc, struct nv_instruction *i)
+{
+   int s;
+
+   set_pred(pc, i);
+
+   DID(pc, i->def[0], 14);
+
+   for (s = 0; s < 1 && i->src[s]; ++s) {
+      if (SFILE(i, s) >= NV_FILE_MEM_C(0) &&
+          SFILE(i, s) <= NV_FILE_MEM_C(15)) {
+         assert(!(pc->emit[1] & 0xc000));
+         assert(s <= 1);
+         pc->emit[1] |= 0x4000 | (const_space_index(i, s) << 10);
+         set_address_16(pc, i->src[s]);
+      } else
+      if (SFILE(i, s) == NV_FILE_GPR) {
+         SID(pc, i->src[s], 26);
+      } else
+      if (SFILE(i, s) == NV_FILE_IMM) {
+         assert(!(pc->emit[1] & 0xc000));
+         assert(s == 1 || i->opcode == NV_OP_MOV);
+         set_immd(pc, i, s);
+      }
+   }
+}
+
+static void
+emit_neg_abs_1_2(struct nv_pc *pc, struct nv_instruction *i)
+{
+   if (i->src[0]->mod & NV_MOD_ABS)
+      pc->emit[0] |= 1 << 7;
+   if (i->src[0]->mod & NV_MOD_NEG)
+      pc->emit[0] |= 1 << 9;
+   if (i->src[1]->mod & NV_MOD_ABS)
+      pc->emit[0] |= 1 << 6;
+   if (i->src[1]->mod & NV_MOD_NEG)
+      pc->emit[0] |= 1 << 8;
+}
+
+static void
+emit_add_f32(struct nv_pc *pc, struct nv_instruction *i)
+{
+   pc->emit[0] = 0x00000000;
+   pc->emit[1] = 0x50000000;
+
+   emit_form_0(pc, i);
+
+   emit_neg_abs_1_2(pc, i);
+
+   if (i->saturate)
+      pc->emit[1] |= 1 << 17;
+}
+
+static void
+emit_mul_f32(struct nv_pc *pc, struct nv_instruction *i)
+{
+   pc->emit[0] = 0x00000000;
+   pc->emit[1] = 0x58000000;
+
+   emit_form_0(pc, i);
+
+   if ((i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG)
+      pc->emit[1] |= 1 << 25;
+
+   if (i->saturate)
+      pc->emit[0] |= 1 << 5;
+}
+
+static void
+emit_mad_f32(struct nv_pc *pc, struct nv_instruction *i)
+{
+   pc->emit[0] = 0x00000000;
+   pc->emit[1] = 0x30000000;
+
+   emit_form_0(pc, i);
+
+   if ((i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG)
+      pc->emit[0] |= 1 << 9;
+
+   if (i->src[2]->mod & NV_MOD_NEG)
+      pc->emit[0] |= 1 << 8;
+
+   if (i->saturate)
+      pc->emit[0] |= 1 << 5;
+}
+
+static void
+emit_minmax(struct nv_pc *pc, struct nv_instruction *i)
+{
+   pc->emit[0] = 0x00000000;
+   pc->emit[1] = 0x08000000;
+
+   if (NV_BASEOP(i->opcode) == NV_OP_MAX)
+      pc->emit[1] |= 0x001e0000;
+   else
+      pc->emit[1] |= 0x000e0000; /* predicate ? */
+
+   emit_form_0(pc, i);
+
+   emit_neg_abs_1_2(pc, i);
+
+   switch (i->opcode) {
+   case NV_OP_MIN_U32:
+   case NV_OP_MAX_U32:
+      pc->emit[0] |= 3;
+      break;
+   case NV_OP_MIN_S32:
+   case NV_OP_MAX_S32:
+      pc->emit[0] |= 3 | (1 << 5);
+      break;
+   case NV_OP_MIN_F32:
+   case NV_OP_MAX_F32:
+   default:
+      break;
+   }
+}
+
+static void
+emit_tex(struct nv_pc *pc, struct nv_instruction *i)
+{
+   pc->emit[0] = 0x00000086;
+   pc->emit[1] = 0x80000000;
+
+   if (i->opcode == NV_OP_TXB) pc->emit[1] |= 0x04000000;
+   else
+   if (i->opcode == NV_OP_TXL) pc->emit[1] |= 0x06000000;
+
+   set_pred(pc, i);
+
+   if (1)
+      pc->emit[0] |= 63 << 26; /* explicit derivatives */
+
+   DID(pc, i->def[0], 14);
+   SID(pc, i->src[0], 20);
+
+   pc->emit[1] |= i->tex_mask << 14;
+   pc->emit[1] |= (i->tex_argc - 1) << 20;
+
+   assert(i->ext.tex.s < 16);
+
+   pc->emit[1] |= i->ext.tex.t;
+   pc->emit[1] |= i->ext.tex.s << 8;
+
+   if (i->tex_live)
+      pc->emit[0] |= 1 << 9;
+}
+
+/* 0: cos, 1: sin, 2: ex2, 3: lg2, 4: rcp, 5: rsqrt */
+static void
+emit_flop(struct nv_pc *pc, struct nv_instruction *i, ubyte op)
+{
+   pc->emit[0] = 0x00000000;
+   pc->emit[1] = 0xc8000000;
+
+   set_pred(pc, i);
+
+   DID(pc, i->def[0], 14);
+   SID(pc, i->src[0], 20);
+
+   pc->emit[0] |= op << 26;
+
+   if (op > 4) {
+      if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 9;
+      if (i->src[0]->mod & NV_MOD_ABS) pc->emit[0] |= 1 << 7;
+   } else {
+      assert(!i->src[0]->mod);
+   }
+}
+
+static void
+emit_quadop(struct nv_pc *pc, struct nv_instruction *i)
+{
+   pc->emit[0] = 0x00000000;
+   pc->emit[1] = 0x48000000;
+
+   set_pred(pc, i);
+
+   assert(SFILE(i, 0) == NV_FILE_GPR && SFILE(i, 1) == NV_FILE_GPR);
+
+   DID(pc, i->def[0], 14);
+   SID(pc, i->src[0], 20);
+   SID(pc, i->src[0], 26);
+
+   pc->emit[0] |= i->lanes << 6; /* l0, l1, l2, l3, dx, dy */
+   pc->emit[1] |= i->quadop;
+}
+
+static void
+emit_ddx(struct nv_pc *pc, struct nv_instruction *i)
+{
+   i->quadop = 0x99;
+   i->lanes = 4;
+   emit_quadop(pc, i);
+}
+
+static void
+emit_ddy(struct nv_pc *pc, struct nv_instruction *i)
+{
+   i->quadop = 0xa5;
+   i->lanes = 5;
+   emit_quadop(pc, i);
+}
+
+/* preparation op (preex2, presin / convert to fixed point) */
+static void
+emit_preop(struct nv_pc *pc, struct nv_instruction *i)
+{
+   pc->emit[0] = 0x00000000;
+   pc->emit[1] = 0x60000000;
+
+   if (i->opcode == NV_OP_PREEX2)
+      pc->emit[0] |= 0x20;
+
+   emit_form_1(pc, i);
+
+   if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 8;
+   if (i->src[0]->mod & NV_MOD_ABS) pc->emit[0] |= 1 << 6;
+}
+
+static void
+emit_shift(struct nv_pc *pc, struct nv_instruction *i)
+{
+   pc->emit[0] = 0x00000003;
+
+   switch (i->opcode) {
+   case NV_OP_SAR:
+      pc->emit[0] |= 0x20; /* fall through */
+   case NV_OP_SHR:
+      pc->emit[1] = 0x58000000;
+      break;
+   case NV_OP_SHL:
+   default:
+      pc->emit[1] = 0x60000000;
+      break;
+   }
+
+   emit_form_0(pc, i);
+}
+
+static void
+emit_bitop(struct nv_pc *pc, struct nv_instruction *i)
+{
+   if (SFILE(i, 1) == NV_FILE_IMM) {
+      pc->emit[0] = 0x00000002;
+      pc->emit[1] = 0x38000000;
+   } else {
+      pc->emit[0] = 0x00000003;
+      pc->emit[1] = 0x68000000;
+   }
+   
+   switch (i->opcode) {
+   case NV_OP_OR:
+      pc->emit[0] |= 0x40;
+      break;
+   case NV_OP_XOR:
+      pc->emit[0] |= 0x80;
+      break;
+   case NV_OP_AND:
+   default:
+      break;
+   }
+
+   emit_form_0(pc, i);
+}
+
+static void
+emit_set(struct nv_pc *pc, struct nv_instruction *i)
+{
+   pc->emit[0] = 0x00000000;
+
+   switch (i->opcode) {
+   case NV_OP_SET_S32:
+      pc->emit[0] |= 0x20; /* fall through */
+   case NV_OP_SET_U32:
+      pc->emit[0] |= 0x3;
+      pc->emit[1] = 0x100e0000;
+      break;
+   case NV_OP_SET_F32_AND:
+      pc->emit[1] = 0x18000000;
+      break;
+   case NV_OP_SET_F32_OR:
+      pc->emit[1] = 0x18200000;
+      break;
+   case NV_OP_SET_F32_XOR:
+      pc->emit[1] = 0x18400000;
+      break;
+   case NV_OP_FSET_F32:
+      pc->emit[0] |= 0x20; /* fall through */
+   case NV_OP_SET_F32:
+   default:
+      pc->emit[1] = 0x180e0000;
+      break;
+   }
+
+   if (DFILE(i, 0) == NV_FILE_PRED) {
+      pc->emit[0] |= 0x1c000;
+      pc->emit[1] += 0x08000000;
+   }
+
+   pc->emit[1] |= i->set_cond << 23;
+
+   emit_form_0(pc, i);
+
+   emit_neg_abs_1_2(pc, i); /* maybe assert that U/S32 don't use mods */
+}
+
+static void
+emit_selp(struct nv_pc *pc, struct nv_instruction *i)
+{
+   pc->emit[0] = 0x00000004;
+   pc->emit[1] = 0x20000000;
+
+   emit_form_0(pc, i);
+
+   if (i->cc || (i->src[2]->mod & NV_MOD_NOT))
+      pc->emit[1] |= 1 << 20;
+}
+
+static void
+emit_slct(struct nv_pc *pc, struct nv_instruction *i)
+{
+   pc->emit[0] = 0x00000000;
+
+   switch (i->opcode) {
+   case NV_OP_SLCT_S32:
+      pc->emit[0] |= 0x20; /* fall through */
+   case NV_OP_SLCT_U32:
+      pc->emit[0] |= 0x3;
+      pc->emit[1] = 0x30000000;
+      break;
+   case NV_OP_SLCT_F32:
+   default:
+      pc->emit[1] = 0x38000000;
+      break;
+   }
+
+   emit_form_0(pc, i);
+
+   pc->emit[1] |= i->set_cond << 23;
+}
+
+static void
+emit_cvt(struct nv_pc *pc, struct nv_instruction *i)
+{
+   pc->emit[0] = 0x00000004;
+   pc->emit[1] = 0x10000000;
+
+   if (i->opcode != NV_OP_CVT)
+      i->ext.cvt.d = i->ext.cvt.s = NV_OPTYPE(i->opcode);
+
+   switch (i->ext.cvt.d) {
+   case NV_TYPE_F32:
+      switch (i->ext.cvt.s) {
+      case NV_TYPE_F32: pc->emit[1] = 0x10000000; break;
+      case NV_TYPE_S32: pc->emit[0] |= 0x200;
+      case NV_TYPE_U32: pc->emit[1] = 0x18000000; break;
+      }
+      break;
+   case NV_TYPE_S32: pc->emit[0] |= 0x80;
+   case NV_TYPE_U32:
+      switch (i->ext.cvt.s) {
+      case NV_TYPE_F32: pc->emit[1] = 0x14000000; break;
+      case NV_TYPE_S32: pc->emit[0] |= 0x200;
+      case NV_TYPE_U32: pc->emit[1] = 0x1c000000; break;
+      }
+      break;
+   default:
+      assert(!"cvt: unknown type");
+      break;
+   }
+
+   if (i->opcode == NV_OP_FLOOR)
+      pc->emit[1] |= 0x00020000;
+   else
+   if (i->opcode == NV_OP_CEIL)
+      pc->emit[1] |= 0x00040000;
+   else
+   if (i->opcode == NV_OP_TRUNC)
+      pc->emit[1] |= 0x00060000;
+
+   if (i->saturate || i->opcode == NV_OP_SAT)
+      pc->emit[0] |= 0x20;
+
+   if (NV_BASEOP(i->opcode) == NV_OP_ABS || i->src[0]->mod & NV_MOD_ABS)
+      pc->emit[0] |= 1 << 6;
+   if (NV_BASEOP(i->opcode) == NV_OP_NEG || i->src[0]->mod & NV_MOD_NEG)
+      pc->emit[0] |= 1 << 8;
+
+   pc->emit[0] |= util_logbase2(DREG(i->def[0])->size) << 20;
+   pc->emit[0] |= util_logbase2(SREG(i->src[0])->size) << 23;
+
+   emit_form_1(pc, i);
+}
+
+static void
+emit_interp(struct nv_pc *pc, struct nv_instruction *i)
+{
+   pc->emit[0] = 0x00000000;
+   pc->emit[1] = 0xc07e0000;
+
+   DID(pc, i->def[0], 14);
+
+   set_pred(pc, i);
+
+   if (i->indirect)
+      SID(pc, i->src[i->indirect], 20);
+   else
+      SID(pc, NULL, 20);
+
+   if (i->opcode == NV_OP_PINTERP) {
+      pc->emit[0] |= 0x040;
+      SID(pc, i->src[1], 26);
+   } else {
+      SID(pc, NULL, 26);
+   }
+
+   pc->emit[1] |= i->src[0]->value->reg.address & 0xffff;
+
+   if (i->centroid)
+      pc->emit[0] |= 0x100;
+   else
+   if (i->flat)
+      pc->emit[0] |= 0x080;
+}
+
+static void
+emit_vfetch(struct nv_pc *pc, struct nv_instruction *i)
+{
+   pc->emit[0] = 0x03f00006;
+   pc->emit[1] = 0x06000000 | i->src[0]->value->reg.address;
+   if (i->patch)
+      pc->emit[0] |= 0x100;
+
+   set_pred(pc, i);
+
+   DVS(pc, i);
+   DID(pc, i->def[0], 14);
+
+   SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 26);
+}
+
+static void
+emit_export(struct nv_pc *pc, struct nv_instruction *i)
+{
+   pc->emit[0] = 0x00000006;
+   pc->emit[1] = 0x0a000000;
+   if (i->patch)
+      pc->emit[0] |= 0x100;
+
+   set_pred(pc, i);
+
+   assert(SFILE(i, 0) == NV_FILE_MEM_V);
+   assert(SFILE(i, 1) == NV_FILE_GPR);
+
+   SID(pc, i->src[1], 26); /* register source */
+   SVS(pc, i->src[0]);
+
+   pc->emit[1] |= i->src[0]->value->reg.address & 0xfff;
+
+   SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 20);
+}
+
+static void
+emit_mov(struct nv_pc *pc, struct nv_instruction *i)
+{
+   if (i->opcode == NV_OP_MOV)
+      i->lanes = 0xf;
+
+   if (SFILE(i, 0) == NV_FILE_IMM) {
+      pc->emit[0] = 0x000001e2;
+      pc->emit[1] = 0x18000000;
+   } else
+   if (SFILE(i, 0) == NV_FILE_PRED) {
+      pc->emit[0] = 0x1c000004;
+      pc->emit[1] = 0x080e0000;
+   } else {
+      pc->emit[0] = 0x00000004 | (i->lanes << 5);
+      pc->emit[1] = 0x28000000;
+   }
+
+   emit_form_1(pc, i);
+}
+
+static void
+emit_ldst_size(struct nv_pc *pc, struct nv_instruction *i)
+{
+   assert(NV_IS_MEMORY_FILE(SFILE(i, 0)));
+
+   switch (SSIZE(i, 0)) {
+   case 1:
+      if (NV_TYPE_ISSGD(i->ext.cvt.s))
+         pc->emit[0] |= 0x20;
+      break;
+   case 2:
+      pc->emit[0] |= 0x40;
+      if (NV_TYPE_ISSGD(i->ext.cvt.s))
+         pc->emit[0] |= 0x20;
+      break;
+   case 4: pc->emit[0] |= 0x80; break;
+   case 8: pc->emit[0] |= 0xa0; break;
+   case 16: pc->emit[0] |= 0xc0; break;
+   default:
+      NOUVEAU_ERR("invalid load/store size %u\n", SSIZE(i, 0));
+      break;
+   }
+}
+
+static void
+emit_ld_const(struct nv_pc *pc, struct nv_instruction *i)
+{
+   pc->emit[0] = 0x00000006;
+   pc->emit[1] = 0x14000000 | (const_space_index(i, 0) << 10);
+
+   emit_ldst_size(pc, i);
+
+   set_pred(pc, i);
+   set_address_16(pc, i->src[0]);
+
+   SID(pc, (i->indirect >= 0) ? i->src[i->indirect] : NULL, 20);
+   DID(pc, i->def[0], 14);
+}
+
+static void
+emit_ld(struct nv_pc *pc, struct nv_instruction *i)
+{
+   if (SFILE(i, 0) >= NV_FILE_MEM_C(0) &&
+       SFILE(i, 0) <= NV_FILE_MEM_C(15)) {
+      emit_ld_const(pc, i);
+   } else {
+      NOUVEAU_ERR("emit_ld(%u): not handled yet\n", SFILE(i, 0));
+      abort();
+   }
+}
+
+static void
+emit_st(struct nv_pc *pc, struct nv_instruction *i)
+{
+   NOUVEAU_ERR("emit_st: not handled yet\n");
+   abort();
+}
+
+void
+nvc0_emit_instruction(struct nv_pc *pc, struct nv_instruction *i)
+{
+   debug_printf("EMIT: "); nvc0_print_instruction(i);
+
+   switch (i->opcode) {
+   case NV_OP_VFETCH:
+      emit_vfetch(pc, i);
+      break;
+   case NV_OP_EXPORT:
+      if (!pc->is_fragprog)
+         emit_export(pc, i);
+      break;
+   case NV_OP_MOV:
+      emit_mov(pc, i);
+      break;
+   case NV_OP_LD:
+      emit_ld(pc, i);
+      break;
+   case NV_OP_ST:
+      emit_st(pc, i);
+      break;
+   case NV_OP_LINTERP:
+   case NV_OP_PINTERP:
+      emit_interp(pc, i);
+      break;
+   case NV_OP_ADD_F32:
+      emit_add_f32(pc, i);
+      break;
+   case NV_OP_AND:
+   case NV_OP_OR:
+   case NV_OP_XOR:
+      emit_bitop(pc, i);
+      break;
+   case NV_OP_CVT:
+   case NV_OP_ABS_F32:
+   case NV_OP_ABS_S32:
+   case NV_OP_NEG_F32:
+   case NV_OP_NEG_S32:
+   case NV_OP_SAT:
+   case NV_OP_CEIL:
+   case NV_OP_FLOOR:
+   case NV_OP_TRUNC:
+      emit_cvt(pc, i);
+      break;
+   case NV_OP_DFDX:
+      emit_ddx(pc, i);
+      break;
+   case NV_OP_DFDY:
+      emit_ddy(pc, i);
+      break;
+   case NV_OP_COS:
+      emit_flop(pc, i, 0);
+      break;
+   case NV_OP_SIN:
+      emit_flop(pc, i, 1);
+      break;
+   case NV_OP_EX2:
+      emit_flop(pc, i, 2);
+      break;
+   case NV_OP_LG2:
+      emit_flop(pc, i, 3);
+      break;
+   case NV_OP_RCP:
+      emit_flop(pc, i, 4);
+      break;
+   case NV_OP_RSQ:
+      emit_flop(pc, i, 5);
+      break;
+   case NV_OP_PRESIN:
+   case NV_OP_PREEX2:
+      emit_preop(pc, i);
+      break;
+   case NV_OP_MAD_F32:
+      emit_mad_f32(pc, i);
+      break;
+   case NV_OP_MAX_F32:
+   case NV_OP_MAX_S32:
+   case NV_OP_MAX_U32:
+   case NV_OP_MIN_F32:
+   case NV_OP_MIN_S32:
+   case NV_OP_MIN_U32:
+      emit_minmax(pc, i);
+      break;
+   case NV_OP_MUL_F32:
+      emit_mul_f32(pc, i);
+      break;
+   case NV_OP_SET_F32:
+   case NV_OP_FSET_F32:
+      emit_set(pc, i);
+      break;
+   case NV_OP_SHL:
+   case NV_OP_SHR:
+   case NV_OP_SAR:
+      emit_shift(pc, i);
+      break;
+   case NV_OP_TEX:
+   case NV_OP_TXB:
+   case NV_OP_TXL:
+      emit_tex(pc, i);
+      break;
+   case NV_OP_BRA:
+      emit_flow(pc, i, 0x40);
+      break;
+   case NV_OP_CALL:
+      emit_flow(pc, i, 0x50);
+      break;
+   case NV_OP_JOINAT:
+      emit_flow(pc, i, 0x60);
+      break;
+   case NV_OP_EXIT:
+      emit_flow(pc, i, 0x80);
+      break;
+   case NV_OP_RET:
+      emit_flow(pc, i, 0x90);
+      break;
+   case NV_OP_KIL:
+      emit_flow(pc, i, 0x98);
+      break;
+   case NV_OP_JOIN:
+   case NV_OP_NOP:
+      pc->emit[0] = 0x00003c00;
+      pc->emit[1] = 0x00000000;
+      break;
+   case NV_OP_SELP:
+      emit_selp(pc, i);
+      break;
+   case NV_OP_SLCT_F32:
+   case NV_OP_SLCT_S32:
+   case NV_OP_SLCT_U32:
+      emit_slct(pc, i);
+      break;
+   default:
+      NOUVEAU_ERR("unhandled NV_OP: %d\n", i->opcode);
+      abort();
+      break;
+   }
+
+   if (i->join)
+      pc->emit[0] |= 0x10;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c
new file mode 100644 (file)
index 0000000..e9e3876
--- /dev/null
@@ -0,0 +1,1174 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nvc0_pc.h"
+#include "nvc0_program.h"
+
+#define DESCEND_ARBITRARY(j, f)                                 \
+do {                                                            \
+   b->pass_seq = ctx->pc->pass_seq;                             \
+                                                                \
+   for (j = 0; j < 2; ++j)                                      \
+      if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
+         f(ctx, b->out[j]);                                      \
+} while (0)
+
+static INLINE boolean
+registers_interfere(struct nv_value *a, struct nv_value *b)
+{
+   if (a->reg.file != b->reg.file)
+      return FALSE;
+   if (NV_IS_MEMORY_FILE(a->reg.file) || NV_IS_MEMORY_FILE(b->reg.file))
+      return FALSE;
+
+   assert(a->join->reg.id >= 0 && b->join->reg.id >= 0);
+
+   if (a->join->reg.id < b->join->reg.id) {
+      return (a->join->reg.id + a->reg.size >= b->join->reg.id);
+   } else
+   if (a->join->reg.id > b->join->reg.id) {
+      return (b->join->reg.id + b->reg.size >= a->join->reg.id);
+   }
+
+   return FALSE;
+}
+
+static INLINE boolean
+values_equal(struct nv_value *a, struct nv_value *b)
+{
+   if (a->reg.file != b->reg.file || a->reg.size != b->reg.size)
+      return FALSE;
+   if (NV_IS_MEMORY_FILE(a->reg.file))
+      return a->reg.address == b->reg.address;
+   else
+      return a->join->reg.id == b->join->reg.id;
+}
+
+#if 0
+static INLINE boolean
+inst_commutation_check(struct nv_instruction *a, struct nv_instruction *b)
+{
+   int si, di;
+
+   for (di = 0; di < 4 && a->def[di]; ++di)
+      for (si = 0; si < 5 && b->src[si]; ++si)
+         if (registers_interfere(a->def[di], b->src[si]->value))
+            return FALSE;
+
+   return TRUE;
+}
+
+/* Check whether we can swap the order of the instructions,
+ * where a & b may be either the earlier or the later one.
+ */
+static boolean
+inst_commutation_legal(struct nv_instruction *a, struct nv_instruction *b)
+{
+   return inst_commutation_check(a, b) && inst_commutation_check(b, a);
+}
+#endif
+
+static INLINE boolean
+inst_removable(struct nv_instruction *nvi)
+{
+   if (nvi->opcode == NV_OP_ST)
+      return FALSE;
+   return (!(nvi->terminator ||
+             nvi->join ||
+             nvi->target ||
+             nvi->fixed ||
+             nvc0_insn_refcount(nvi)));
+}
+
+static INLINE boolean
+inst_is_noop(struct nv_instruction *nvi)
+{
+   if (nvi->opcode == NV_OP_UNDEF || nvi->opcode == NV_OP_BIND)
+      return TRUE;
+   if (nvi->terminator || nvi->join)
+      return FALSE;
+   if (nvi->def[0] && nvi->def[0]->join->reg.id < 0)
+      return TRUE;
+   if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT)
+      return FALSE;
+   if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file)
+      return FALSE;
+
+   if (nvi->src[0]->value->join->reg.id < 0) {
+      NOUVEAU_DBG("inst_is_noop: orphaned value detected\n");
+      return TRUE;
+   }
+
+   if (nvi->opcode == NV_OP_SELECT)
+      if (!values_equal(nvi->def[0], nvi->src[1]->value))
+         return FALSE;
+   return values_equal(nvi->def[0], nvi->src[0]->value);
+}
+
+struct nv_pass {
+   struct nv_pc *pc;
+   int n;
+   void *priv;
+};
+
+static int
+nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b);
+
+static void
+nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
+{
+   struct nv_pc *pc = (struct nv_pc *)priv;
+   struct nv_basic_block *in;
+   struct nv_instruction *nvi, *next;
+   int j;
+
+   for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->emit_size; --j);
+
+   if (j >= 0) {
+      in = pc->bb_list[j];
+
+      /* check for no-op branches (BRA $PC+8) */
+      if (in->exit && in->exit->opcode == NV_OP_BRA && in->exit->target == b) {
+         in->emit_size -= 8;
+         pc->emit_size -= 8;
+
+         for (++j; j < pc->num_blocks; ++j)
+            pc->bb_list[j]->emit_pos -= 8;
+
+         nvc0_insn_delete(in->exit);
+      }
+      b->emit_pos = in->emit_pos + in->emit_size;
+   }
+
+   pc->bb_list[pc->num_blocks++] = b;
+
+   /* visit node */
+
+   for (nvi = b->entry; nvi; nvi = next) {
+      next = nvi->next;
+      if (inst_is_noop(nvi) ||
+          (pc->is_fragprog && nvi->opcode == NV_OP_EXPORT)) {
+         nvc0_insn_delete(nvi);
+      } else
+         b->emit_size += 8;
+   }
+   pc->emit_size += b->emit_size;
+
+#ifdef NOUVEAU_DEBUG
+   if (!b->entry)
+      debug_printf("BB:%i is now empty\n", b->id);
+   else
+      debug_printf("BB:%i size = %u\n", b->id, b->emit_size);
+#endif
+}
+
+static int
+nv_pc_pass2(struct nv_pc *pc, struct nv_basic_block *root)
+{
+   struct nv_pass pass;
+
+   pass.pc = pc;
+
+   pc->pass_seq++;
+   nv_pass_flatten(&pass, root);
+
+   nvc0_pc_pass_in_order(root, nv_pc_pass_pre_emission, pc);
+
+   return 0;
+}
+
+int
+nvc0_pc_exec_pass2(struct nv_pc *pc)
+{
+   int i, ret;
+
+   NOUVEAU_DBG("preparing %u blocks for emission\n", pc->num_blocks);
+
+   pc->num_blocks = 0; /* will reorder bb_list */
+
+   for (i = 0; i < pc->num_subroutines + 1; ++i)
+      if (pc->root[i] && (ret = nv_pc_pass2(pc, pc->root[i])))
+         return ret;
+   return 0;
+}
+
+static INLINE boolean
+is_cspace_load(struct nv_instruction *nvi)
+{
+   if (!nvi)
+      return FALSE;
+   assert(nvi->indirect != 0);
+   return (nvi->opcode == NV_OP_LD &&
+           nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
+           nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15));
+}
+
+static INLINE boolean
+is_immd32_load(struct nv_instruction *nvi)
+{
+   if (!nvi)
+      return FALSE;
+   return (nvi->opcode == NV_OP_MOV &&
+           nvi->src[0]->value->reg.file == NV_FILE_IMM &&
+           nvi->src[0]->value->reg.size == 4);
+}
+
+static INLINE void
+check_swap_src_0_1(struct nv_instruction *nvi)
+{
+   static const uint8_t cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
+
+   struct nv_ref *src0 = nvi->src[0];
+   struct nv_ref *src1 = nvi->src[1];
+
+   if (!nv_op_commutative(nvi->opcode))
+      return;
+   assert(src0 && src1 && src0->value && src1->value);
+
+   if (is_cspace_load(src0->value->insn)) {
+      if (!is_cspace_load(src1->value->insn)) {
+         nvi->src[0] = src1;
+         nvi->src[1] = src0;
+      }
+   }
+
+   if (nvi->src[0] != src0 && nvi->opcode == NV_OP_SET)
+      nvi->set_cond = cc_swapped[nvi->set_cond];
+}
+
+static void
+nvi_set_indirect_load(struct nv_pc *pc,
+                      struct nv_instruction *nvi, struct nv_value *val)
+{
+   for (nvi->indirect = 0; nvi->indirect < 6 && nvi->src[nvi->indirect];
+        ++nvi->indirect);
+   assert(nvi->indirect < 6);
+   nv_reference(pc, nvi, nvi->indirect, val);
+}
+
+static int
+nvc0_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+   struct nv_instruction *nvi, *ld;
+   int s;
+
+   for (nvi = b->entry; nvi; nvi = nvi->next) {
+      check_swap_src_0_1(nvi);
+
+      for (s = 0; s < 3 && nvi->src[s]; ++s) {
+         ld = nvi->src[s]->value->insn;
+         if (!ld || ld->opcode != NV_OP_LD)
+            continue;
+         if (!nvc0_insn_can_load(nvi, s, ld))
+            continue;
+
+         /* fold it ! */
+         nv_reference(ctx->pc, nvi, s, ld->src[0]->value);
+         if (ld->indirect >= 0)
+            nvi_set_indirect_load(ctx->pc, nvi, ld->src[ld->indirect]->value);
+
+         if (!nvc0_insn_refcount(ld))
+            nvc0_insn_delete(ld);
+      }
+   }
+   DESCEND_ARBITRARY(s, nvc0_pass_fold_loads);
+
+   return 0;
+}
+
+static INLINE uint
+modifiers_opcode(uint8_t mod)
+{
+   switch (mod) {
+   case NV_MOD_NEG: return NV_OP_NEG;
+   case NV_MOD_ABS: return NV_OP_ABS;
+   case 0:
+      return NV_OP_MOV;
+   default:
+      return NV_OP_NOP;
+   }
+}
+
+/* NOTE: Assumes loads have not yet been folded. */
+static int
+nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+   struct nv_instruction *nvi, *mi, *next;
+   int j;
+   uint8_t mod;
+
+   for (nvi = b->entry; nvi; nvi = next) {
+      next = nvi->next;
+      if (nvi->opcode == NV_OP_SUB) {
+         nvi->src[1]->mod ^= NV_MOD_NEG;
+         nvi->opcode = NV_OP_ADD;
+      }
+
+      for (j = 0; j < 3 && nvi->src[j]; ++j) {
+         mi = nvi->src[j]->value->insn;
+         if (!mi)
+            continue;
+         if (mi->def[0]->refc > 1 || mi->predicate >= 0)
+            continue;
+
+         if (NV_BASEOP(mi->opcode) == NV_OP_NEG) mod = NV_MOD_NEG;
+         else
+         if (NV_BASEOP(mi->opcode) == NV_OP_ABS) mod = NV_MOD_ABS;
+         else
+            continue;
+         assert(!(mod & mi->src[0]->mod & NV_MOD_NEG));
+
+         mod |= mi->src[0]->mod;
+
+         if ((nvi->opcode == NV_OP_ABS) || (nvi->src[j]->mod & NV_MOD_ABS)) {
+            /* abs neg [abs] = abs */
+            mod &= ~(NV_MOD_NEG | NV_MOD_ABS);
+         } else
+         if ((nvi->opcode == NV_OP_NEG) && (mod & NV_MOD_NEG)) {
+            /* neg as opcode and modifier on same insn cannot occur */
+            /* neg neg abs = abs, neg neg = identity */
+            assert(j == 0);
+            if (mod & NV_MOD_ABS)
+               nvi->opcode = NV_OP_ABS;
+            else
+               nvi->opcode = NV_OP_MOV;
+            mod = 0;
+         }
+
+         if ((nv_op_supported_src_mods(nvi->opcode) & mod) != mod)
+            continue;
+
+         nv_reference(ctx->pc, nvi, j, mi->src[0]->value);
+
+         nvi->src[j]->mod ^= mod;
+      }
+
+      if (nvi->opcode == NV_OP_SAT) {
+         mi = nvi->src[0]->value->insn;
+
+         if (mi->def[0]->refc > 1 ||
+             (mi->opcode != NV_OP_ADD &&
+              mi->opcode != NV_OP_MUL &&
+              mi->opcode != NV_OP_MAD))
+            continue;
+         mi->saturate = 1;
+         mi->def[0] = nvi->def[0];
+         mi->def[0]->insn = mi;
+         nvc0_insn_delete(nvi);
+      }
+   }
+   DESCEND_ARBITRARY(j, nv_pass_lower_mods);
+
+   return 0;
+}
+
+#define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
+
+/*
+static void
+modifiers_apply(uint32_t *val, ubyte type, ubyte mod)
+{
+   if (mod & NV_MOD_ABS) {
+      if (type == NV_TYPE_F32)
+         *val &= 0x7fffffff;
+      else
+      if ((*val) & (1 << 31))
+         *val = ~(*val) + 1;
+   }
+   if (mod & NV_MOD_NEG) {
+      if (type == NV_TYPE_F32)
+         *val ^= 0x80000000;
+      else
+         *val = ~(*val) + 1;
+   }
+}
+*/
+
+#if 0
+static void
+constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,
+                    struct nv_value *src0, struct nv_value *src1)
+{
+   struct nv_value *val;
+   union {
+      float f32;
+      uint32_t u32;
+      int32_t s32;
+   } u0, u1, u;
+   ubyte type;
+
+   if (!nvi->def[0])
+      return;
+   type = NV_OPTYPE(nvi->opcode);
+
+   u.u32 = 0;
+   u0.u32 = src0->reg.imm.u32;
+   u1.u32 = src1->reg.imm.u32;
+
+   modifiers_apply(&u0.u32, type, nvi->src[0]->mod);
+   modifiers_apply(&u1.u32, type, nvi->src[1]->mod);
+
+   switch (nvi->opcode) {
+   case NV_OP_MAD:
+      if (nvi->src[2]->value->reg.file != NV_FILE_GPR)
+         return;
+      /* fall through */
+   case NV_OP_MUL:
+      switch (type) {
+      case NV_TYPE_F32: u.f32 = u0.f32 * u1.f32; break;
+      case NV_TYPE_U32: u.u32 = u0.u32 * u1.u32; break;
+      case NV_TYPE_S32: u.s32 = u0.s32 * u1.s32; break;
+      default:
+         assert(0);
+         break;
+      }
+      break;
+   case NV_OP_ADD:
+      switch (type) {
+      case NV_TYPE_F32: u.f32 = u0.f32 + u1.f32; break;
+      case NV_TYPE_U32: u.u32 = u0.u32 + u1.u32; break;
+      case NV_TYPE_S32: u.s32 = u0.s32 + u1.s32; break;
+      default:
+         assert(0);
+         break;
+      }
+      break;
+   case NV_OP_SUB:
+      switch (type) {
+      case NV_TYPE_F32: u.f32 = u0.f32 - u1.f32; break;
+      case NV_TYPE_U32: u.u32 = u0.u32 - u1.u32; break;
+      case NV_TYPE_S32: u.s32 = u0.s32 - u1.s32; break;
+      default:
+         assert(0);
+         break;
+      }
+      break;
+   default:
+      return;
+   }
+
+   nvi->opcode = NV_OP_MOV;
+
+   val = new_value(pc, NV_FILE_IMM, type);
+
+   val->reg.imm.u32 = u.u32;
+
+   nv_reference(pc, nvi, 1, NULL);
+   nv_reference(pc, nvi, 0, val);
+
+   if (nvi->src[2]) { /* from MAD */
+      nvi->src[1] = nvi->src[0];
+      nvi->src[0] = nvi->src[2];
+      nvi->src[2] = NULL;
+      nvi->opcode = NV_OP_ADD;
+
+      if (val->reg.imm.u32 == 0) {
+         nvi->src[1] = NULL;
+         nvi->opcode = NV_OP_MOV;
+      }
+   }
+}
+
+static void
+constant_operand(struct nv_pc *pc,
+                 struct nv_instruction *nvi, struct nv_value *val, int s)
+{
+   union {
+      float f32;
+      uint32_t u32;
+      int32_t s32;
+   } u;
+   int t = s ? 0 : 1;
+   uint op;
+   ubyte type;
+
+   if (!nvi->def[0])
+      return;
+   type = NV_OPTYPE(nvi->opcode);
+
+   u.u32 = val->reg.imm.u32;
+   modifiers_apply(&u.u32, type, nvi->src[s]->mod);
+
+   switch (NV_BASEOP(nvi->opcode)) {
+   case NV_OP_MUL:
+      if ((type == NV_TYPE_F32 && u.f32 == 1.0f) ||
+          (NV_TYPE_ISINT(type) && u.u32 == 1)) {
+         if ((op = modifiers_opcode(nvi->src[t]->mod)) == NV_OP_NOP)
+            break;
+         nvi->opcode = op;
+         nv_reference(pc, nvi, s, NULL);
+         nvi->src[0] = nvi->src[t];
+         nvi->src[1] = NULL;
+      } else
+      if ((type == NV_TYPE_F32 && u.f32 == 2.0f) ||
+          (NV_TYPE_ISINT(type) && u.u32 == 2)) {
+         nvi->opcode = NV_OP_ADD;
+         nv_reference(pc, nvi, s, nvi->src[t]->value);
+         nvi->src[s]->mod = nvi->src[t]->mod;
+      } else
+      if (type == NV_TYPE_F32 && u.f32 == -1.0f) {
+         if (nvi->src[t]->mod & NV_MOD_NEG)
+            nvi->opcode = NV_OP_MOV;
+         else
+            nvi->opcode = NV_OP_NEG;
+         nv_reference(pc, nvi, s, NULL);
+         nvi->src[0] = nvi->src[t];
+         nvi->src[1] = NULL;
+      } else
+      if (type == NV_TYPE_F32 && u.f32 == -2.0f) {
+         nvi->opcode = NV_OP_ADD;
+         nv_reference(pc, nvi, s, nvi->src[t]->value);
+         nvi->src[s]->mod = (nvi->src[t]->mod ^= NV_MOD_NEG);
+      } else
+      if (u.u32 == 0) {
+         nvi->opcode = NV_OP_MOV;
+         nv_reference(pc, nvi, t, NULL);
+         if (s) {
+            nvi->src[0] = nvi->src[1];
+            nvi->src[1] = NULL;
+         }
+      }
+      break;
+   case NV_OP_ADD:
+      if (u.u32 == 0) {
+         if ((op = modifiers_opcode(nvi->src[t]->mod)) == NV_OP_NOP)
+            break;
+         nvi->opcode = op;
+         nv_reference(pc, nvi, s, NULL);
+         nvi->src[0] = nvi->src[t];
+         nvi->src[1] = NULL;
+      }
+      break;
+   case NV_OP_RCP:
+      u.f32 = 1.0f / u.f32;
+      (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
+      nvi->opcode = NV_OP_MOV;
+      assert(s == 0);
+      nv_reference(pc, nvi, 0, val);
+      break;
+   case NV_OP_RSQ:
+      u.f32 = 1.0f / sqrtf(u.f32);
+      (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
+      nvi->opcode = NV_OP_MOV;
+      assert(s == 0);
+      nv_reference(pc, nvi, 0, val);
+      break;
+   default:
+      break;
+   }
+}
+#endif
+
+static int
+nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+#if 0
+   struct nv_instruction *nvi, *next;
+   int j;
+
+   for (nvi = b->entry; nvi; nvi = next) {
+      struct nv_value *src0, *src1, *src;
+      int mod;
+
+      next = nvi->next;
+
+      src0 = nvcg_find_immediate(nvi->src[0]);
+      src1 = nvcg_find_immediate(nvi->src[1]);
+
+      if (src0 && src1)
+         constant_expression(ctx->pc, nvi, src0, src1);
+      else {
+         if (src0)
+            constant_operand(ctx->pc, nvi, src0, 0);
+         else
+         if (src1)
+            constant_operand(ctx->pc, nvi, src1, 1);
+      }
+
+      /* try to combine MUL, ADD into MAD */
+      if (nvi->opcode != NV_OP_ADD)
+         continue;
+
+      src0 = nvi->src[0]->value;
+      src1 = nvi->src[1]->value;
+
+      if (SRC_IS_MUL(src0) && src0->refc == 1)
+         src = src0;
+      else
+      if (SRC_IS_MUL(src1) && src1->refc == 1)
+         src = src1;
+      else
+         continue;
+
+      /* could have an immediate from above constant_*  */
+      if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR)
+         continue;
+
+      nvi->opcode = NV_OP_MAD;
+      mod = nvi->src[(src == src0) ? 0 : 1]->mod;
+      nv_reference(ctx->pc, &nvi->src[(src == src0) ? 0 : 1], NULL);
+      nvi->src[2] = nvi->src[(src == src0) ? 1 : 0];
+
+      assert(!(mod & ~NV_MOD_NEG));
+      nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value);
+      nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value);
+      nvi->src[0]->mod = src->insn->src[0]->mod ^ mod;
+      nvi->src[1]->mod = src->insn->src[1]->mod;
+   }
+   DESCEND_ARBITRARY(j, nv_pass_lower_arith);
+#endif
+   return 0;
+}
+
+/* TODO: redundant store elimination */
+
+struct mem_record {
+   struct mem_record *next;
+   struct nv_instruction *insn;
+   uint32_t ofst;
+   uint32_t base;
+   uint32_t size;
+};
+
+#define MEM_RECORD_POOL_SIZE 1024
+
+struct pass_reld_elim {
+   struct nv_pc *pc;
+
+   struct mem_record *imm;
+   struct mem_record *mem_v;
+   struct mem_record *mem_a;
+   struct mem_record *mem_c[16];
+   struct mem_record *mem_l;
+
+   struct mem_record pool[MEM_RECORD_POOL_SIZE];
+   int alloc;
+};
+
+static void
+combine_load(struct mem_record *rec, struct nv_instruction *ld)
+{
+   struct nv_instruction *fv = rec->insn;
+   struct nv_value *mem = ld->src[0]->value;
+   uint32_t size = rec->size + mem->reg.size;
+   int j;
+   int d = rec->size / 4;
+
+   assert(rec->size < 16);
+   if (rec->ofst > mem->reg.address) {
+      if ((size == 8 && mem->reg.address & 3) ||
+          (size > 8 && mem->reg.address & 7))
+         return;
+      rec->ofst = mem->reg.address;
+      for (j = 0; j < d; ++j)
+         fv->def[d + j] = fv->def[j];
+      d = 0;
+   } else
+   if ((size == 8 && rec->ofst & 3) ||
+       (size > 8 && rec->ofst & 7)) {
+      return;
+   }
+
+   for (j = 0; j < mem->reg.size / 4; ++j) {
+      fv->def[d] = ld->def[j];
+      fv->def[d++]->insn = fv;
+   }
+
+   fv->src[0]->value->reg.size = rec->size = size;
+
+   nvc0_insn_delete(ld);
+}
+
+static void
+combine_export(struct mem_record *rec, struct nv_instruction *ex)
+{
+
+}
+
+static INLINE void
+add_mem_record(struct pass_reld_elim *ctx, struct mem_record **rec,
+               uint32_t base, uint32_t ofst, struct nv_instruction *nvi)
+{
+   struct mem_record *it = &ctx->pool[ctx->alloc++];
+
+   it->next = *rec;
+   *rec = it;
+   it->base = base;
+   it->ofst = ofst;
+   it->insn = nvi;
+   it->size = nvi->src[0]->value->reg.size;
+}
+
+/* vectorize and reuse loads from memory or of immediates */
+static int
+nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b)
+{
+   struct mem_record **rec, *it;
+   struct nv_instruction *ld, *next;
+   struct nv_value *mem;
+   uint32_t base, ofst;
+   int s;
+
+   for (ld = b->entry; ld; ld = next) {
+      next = ld->next;
+
+      if (is_cspace_load(ld)) {
+         mem = ld->src[0]->value;
+         rec = &ctx->mem_c[ld->src[0]->value->reg.file - NV_FILE_MEM_C(0)];
+      } else
+      if (ld->opcode == NV_OP_VFETCH) {
+         mem = ld->src[0]->value;
+         rec = &ctx->mem_a;
+      } else
+      if (ld->opcode == NV_OP_EXPORT) {
+         mem = ld->src[0]->value;
+         if (mem->reg.file != NV_FILE_MEM_V)
+            continue;
+         rec = &ctx->mem_v;
+      } else {
+         continue;
+      }
+      if (ld->def[0] && ld->def[0]->refc == 0)
+         continue;
+      ofst = mem->reg.address;
+      base = (ld->indirect >= 0) ? ld->src[ld->indirect]->value->n : 0;
+
+      for (it = *rec; it; it = it->next) {
+         if (it->base == base &&
+             ((it->ofst >> 4) == (ofst >> 4)) &&
+             ((it->ofst + it->size == ofst) ||
+              (it->ofst - mem->reg.size == ofst))) {
+            if (ld->opcode == NV_OP_LD && it->size + mem->reg.size == 12)
+               continue;
+            if (it->ofst < ofst) {
+               if ((it->ofst & 0xf) == 4)
+                  continue;
+            } else
+            if ((ofst & 0xf) == 4)
+               continue;
+            break;
+         }
+      }
+      if (it) {
+         switch (ld->opcode) {
+         case NV_OP_EXPORT: combine_export(it, ld); break;
+         default:
+            combine_load(it, ld);
+            break;
+         }
+      } else
+      if (ctx->alloc < MEM_RECORD_POOL_SIZE) {
+         add_mem_record(ctx, rec, base, ofst, ld);
+      }
+   }
+
+   DESCEND_ARBITRARY(s, nv_pass_mem_opt);
+   return 0;
+}
+
+static void
+eliminate_store(struct mem_record *rec, struct nv_instruction *st)
+{
+}
+
+/* elimination of redundant stores */
+static int
+pass_store_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b)
+{
+   struct mem_record **rec, *it;
+   struct nv_instruction *st, *next;
+   struct nv_value *mem;
+   uint32_t base, ofst, size;
+   int s;
+
+   for (st = b->entry; st; st = next) {
+      next = st->next;
+
+      if (st->opcode == NV_OP_ST) {
+         mem = st->src[0]->value;
+         rec = &ctx->mem_l;
+      } else
+      if (st->opcode == NV_OP_EXPORT) {
+         mem = st->src[0]->value;
+         if (mem->reg.file != NV_FILE_MEM_V)
+            continue;
+         rec = &ctx->mem_v;
+      } else
+      if (st->opcode == NV_OP_ST) {
+         /* TODO: purge */
+      }
+      ofst = mem->reg.address;
+      base = (st->indirect >= 0) ? st->src[st->indirect]->value->n : 0;
+      size = mem->reg.size;
+
+      for (it = *rec; it; it = it->next) {
+         if (it->base == base &&
+             (it->ofst <= ofst && (it->ofst + size) > ofst))
+            break;
+      }
+      if (it)
+         eliminate_store(it, st);
+      else
+         add_mem_record(ctx, rec, base, ofst, st);
+   }
+
+   DESCEND_ARBITRARY(s, nv_pass_mem_opt);
+   return 0;
+}
+
+/* TODO: properly handle loads from l[] memory in the presence of stores */
+static int
+nv_pass_reload_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b)
+{
+#if 0
+   struct load_record **rec, *it;
+   struct nv_instruction *ld, *next;
+   uint64_t data[2];
+   struct nv_value *val;
+   int j;
+
+   for (ld = b->entry; ld; ld = next) {
+      next = ld->next;
+      if (!ld->src[0])
+         continue;
+      val = ld->src[0]->value;
+      rec = NULL;
+
+      if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) {
+         data[0] = val->reg.id;
+         data[1] = 0;
+         rec = &ctx->mem_v;
+      } else
+      if (ld->opcode == NV_OP_LDA) {
+         data[0] = val->reg.id;
+         data[1] = ld->src[4] ? ld->src[4]->value->n : ~0ULL;
+         if (val->reg.file >= NV_FILE_MEM_C(0) &&
+             val->reg.file <= NV_FILE_MEM_C(15))
+            rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)];
+         else
+         if (val->reg.file == NV_FILE_MEM_S)
+            rec = &ctx->mem_s;
+         else
+         if (val->reg.file == NV_FILE_MEM_L)
+            rec = &ctx->mem_l;
+      } else
+      if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) {
+         data[0] = val->reg.imm.u32;
+         data[1] = 0;
+         rec = &ctx->imm;
+      }
+
+      if (!rec || !ld->def[0]->refc)
+         continue;
+
+      for (it = *rec; it; it = it->next)
+         if (it->data[0] == data[0] && it->data[1] == data[1])
+            break;
+
+      if (it) {
+         if (ld->def[0]->reg.id >= 0)
+            it->value = ld->def[0];
+         else
+         if (!ld->fixed)
+            nvc0_pc_replace_value(ctx->pc, ld->def[0], it->value);
+      } else {
+         if (ctx->alloc == LOAD_RECORD_POOL_SIZE)
+            continue;
+         it = &ctx->pool[ctx->alloc++];
+         it->next = *rec;
+         it->data[0] = data[0];
+         it->data[1] = data[1];
+         it->value = ld->def[0];
+         *rec = it;
+      }
+   }
+
+   ctx->imm = NULL;
+   ctx->mem_s = NULL;
+   ctx->mem_v = NULL;
+   for (j = 0; j < 16; ++j)
+      ctx->mem_c[j] = NULL;
+   ctx->mem_l = NULL;
+   ctx->alloc = 0;
+
+   DESCEND_ARBITRARY(j, nv_pass_reload_elim);
+#endif
+   return 0;
+}
+
+static int
+nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+   int i, c, j;
+
+   for (i = 0; i < ctx->pc->num_instructions; ++i) {
+      struct nv_instruction *nvi = &ctx->pc->instructions[i];
+      struct nv_value *def[4];
+
+      if (!nv_is_texture_op(nvi->opcode))
+         continue;
+      nvi->tex_mask = 0;
+
+      for (c = 0; c < 4; ++c) {
+         if (nvi->def[c]->refc)
+            nvi->tex_mask |= 1 << c;
+         def[c] = nvi->def[c];
+      }
+
+      j = 0;
+      for (c = 0; c < 4; ++c)
+         if (nvi->tex_mask & (1 << c))
+            nvi->def[j++] = def[c];
+      for (c = 0; c < 4; ++c)
+         if (!(nvi->tex_mask & (1 << c)))
+           nvi->def[j++] = def[c];
+      assert(j == 4);
+   }
+   return 0;
+}
+
+struct nv_pass_dce {
+   struct nv_pc *pc;
+   uint removed;
+};
+
+static int
+nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
+{
+   int j;
+   struct nv_instruction *nvi, *next;
+
+   for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) {
+      next = nvi->next;
+
+      if (inst_removable(nvi)) {
+         nvc0_insn_delete(nvi);
+         ++ctx->removed;
+      }
+   }
+   DESCEND_ARBITRARY(j, nv_pass_dce);
+
+   return 0;
+}
+
+#if 0
+/* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.
+ * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with
+ * BREAK and dummy ELSE block.
+ */
+static INLINE boolean
+bb_is_if_else_endif(struct nv_basic_block *bb)
+{
+   if (!bb->out[0] || !bb->out[1])
+      return FALSE;
+
+   if (bb->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) {
+      return (bb->out[0]->out[1] == bb->out[1]->out[0] &&
+              !bb->out[1]->out[1]);
+   } else {
+      return (bb->out[0]->out[0] == bb->out[1]->out[0] &&
+              !bb->out[0]->out[1] &&
+              !bb->out[1]->out[1]);
+   }
+}
+
+/* predicate instructions and remove branch at the end */
+static void
+predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b,
+                       struct nv_value *p, ubyte cc)
+{
+
+}
+#endif
+
+/* NOTE: Run this after register allocation, we can just cut out the cflow
+ * instructions and hook the predicates to the conditional OPs if they are
+ * not using immediates; better than inserting SELECT to join definitions.
+ *
+ * NOTE: Should adapt prior optimization to make this possible more often.
+ */
+static int
+nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+   return 0;
+}
+
+/* local common subexpression elimination, stupid O(n^2) implementation */
+static int
+nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+#if 0
+   struct nv_instruction *ir, *ik, *next;
+   struct nv_instruction *entry = b->phi ? b->phi : b->entry;
+   int s;
+   unsigned int reps;
+
+   do {
+      reps = 0;
+      for (ir = entry; ir; ir = next) {
+         next = ir->next;
+         for (ik = entry; ik != ir; ik = ik->next) {
+            if (ir->opcode != ik->opcode || ir->fixed)
+               continue;
+
+            if (!ir->def[0] || !ik->def[0] ||
+                ik->opcode == NV_OP_LDA ||
+                ik->opcode == NV_OP_STA ||
+                ik->opcode == NV_OP_MOV ||
+                nv_is_vector_op(ik->opcode))
+               continue; /* ignore loads, stores & moves */
+
+            if (ik->src[4] || ir->src[4])
+               continue; /* don't mess with address registers */
+
+            if (ik->flags_src || ir->flags_src ||
+                ik->flags_def || ir->flags_def)
+               continue; /* and also not with flags, for now */
+
+            if (ik->def[0]->reg.file == NV_FILE_OUT ||
+                ir->def[0]->reg.file == NV_FILE_OUT ||
+                !values_equal(ik->def[0], ir->def[0]))
+               continue;
+
+            for (s = 0; s < 3; ++s) {
+               struct nv_value *a, *b;
+
+               if (!ik->src[s]) {
+                  if (ir->src[s])
+                     break;
+                  continue;
+               }
+               if (ik->src[s]->mod != ir->src[s]->mod)
+                  break;
+               a = ik->src[s]->value;
+               b = ir->src[s]->value;
+               if (a == b)
+                  continue;
+               if (a->reg.file != b->reg.file ||
+                   a->reg.id < 0 ||
+                   a->reg.id != b->reg.id)
+                  break;
+            }
+            if (s == 3) {
+               nvc0_insn_delete(ir);
+               ++reps;
+               nvcg_replace_value(ctx->pc, ir->def[0], ik->def[0]);
+               break;
+            }
+         }
+      }
+   } while(reps);
+
+   DESCEND_ARBITRARY(s, nv_pass_cse);
+#endif
+   return 0;
+}
+
+static int
+nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root)
+{
+   struct pass_reld_elim *reldelim;
+   struct nv_pass pass;
+   struct nv_pass_dce dce;
+   int ret;
+
+   pass.n = 0;
+   pass.pc = pc;
+
+   /* Do this first, so we don't have to pay attention
+    * to whether sources are supported memory loads.
+    */
+   pc->pass_seq++;
+   ret = nv_pass_lower_arith(&pass, root);
+   if (ret)
+      return ret;
+
+   pc->pass_seq++;
+   ret = nv_pass_lower_mods(&pass, root);
+   if (ret)
+      return ret;
+
+   pc->pass_seq++;
+   ret = nvc0_pass_fold_loads(&pass, root);
+   if (ret)
+      return ret;
+
+   if (pc->opt_reload_elim) {
+      reldelim = CALLOC_STRUCT(pass_reld_elim);
+      reldelim->pc = pc;
+
+      pc->pass_seq++;
+      ret = nv_pass_reload_elim(reldelim, root);
+      if (ret) {
+         FREE(reldelim);
+         return ret;
+      }
+      memset(reldelim, 0, sizeof(struct pass_reld_elim));
+      reldelim->pc = pc;
+   }
+
+   pc->pass_seq++;
+   ret = nv_pass_cse(&pass, root);
+   if (ret)
+      return ret;
+
+   dce.pc = pc;
+   do {
+      dce.removed = 0;
+      pc->pass_seq++;
+      ret = nv_pass_dce(&dce, root);
+      if (ret)
+         return ret;
+   } while (dce.removed);
+
+   if (pc->opt_reload_elim) {
+      pc->pass_seq++;
+      ret = nv_pass_mem_opt(reldelim, root);
+      if (!ret) {
+         memset(reldelim, 0, sizeof(struct pass_reld_elim));
+         reldelim->pc = pc;
+
+         pc->pass_seq++;
+         ret = nv_pass_mem_opt(reldelim, root);
+      }
+      FREE(reldelim);
+      if (ret)
+         return ret;
+   }
+
+   ret = nv_pass_tex_mask(&pass, root);
+   if (ret)
+      return ret;
+
+   return ret;
+}
+
+int
+nvc0_pc_exec_pass0(struct nv_pc *pc)
+{
+   int i, ret;
+
+   for (i = 0; i < pc->num_subroutines + 1; ++i)
+      if (pc->root[i] && (ret = nv_pc_pass0(pc, pc->root[i])))
+         return ret;
+   return 0;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c
new file mode 100644 (file)
index 0000000..9eac5ad
--- /dev/null
@@ -0,0 +1,375 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nvc0_pc.h"
+
+#define PRINT(args...) debug_printf(args)
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
+#endif
+
+static const char *norm = "\x1b[00m";
+static const char *gree = "\x1b[32m";
+static const char *blue = "\x1b[34m";
+static const char *cyan = "\x1b[36m";
+static const char *yllw = "\x1b[33m";
+static const char *mgta = "\x1b[35m";
+
+static const char *nv_cond_names[] =
+{
+   "never", "lt" , "eq" , "le" , "gt" , "ne" , "ge" , "",
+   "never", "ltu", "equ", "leu", "gtu", "neu", "geu", "",
+   "o", "c", "a", "s"
+};
+
+static const char *nv_modifier_strings[] =
+{
+   "",
+   "neg",
+   "abs",
+   "neg abs",
+   "not",
+   "not neg"
+   "not abs",
+   "not neg abs",
+   "sat",
+   "BAD_MOD"
+};
+
+const char *
+nvc0_opcode_name(uint opcode)
+{
+   return nvc0_op_info_table[MIN2(opcode, NV_OP_COUNT)].name;
+}
+
+static INLINE const char *
+nv_type_name(ubyte type, ubyte size)
+{
+   switch (type) {
+   case NV_TYPE_U16: return "u16";
+   case NV_TYPE_S16: return "s16";
+   case NV_TYPE_F32: return "f32";
+   case NV_TYPE_U32: return "u32";
+   case NV_TYPE_S32: return "s32";
+   case NV_TYPE_P32: return "p32";
+   case NV_TYPE_F64: return "f64";
+   case NV_TYPE_ANY:
+   {
+      switch (size) {
+      case 1: return "b8";
+      case 2: return "b16";
+      case 4: return "b32";
+      case 8: return "b64";
+      case 12: return "b96";
+      case 16: return "b128";
+      default:
+         return "BAD_SIZE";
+      }
+   }
+   default:
+      return "BAD_TYPE";
+   }
+}
+
+static INLINE const char *
+nv_cond_name(ubyte cc)
+{
+   return nv_cond_names[MIN2(cc, 19)];
+}
+
+static INLINE const char *
+nv_modifier_string(ubyte mod)
+{
+   return nv_modifier_strings[MIN2(mod, 9)];
+}
+
+static INLINE int
+nv_value_id(struct nv_value *value)
+{
+   if (value->join->reg.id >= 0)
+      return value->join->reg.id;
+   return value->n;
+}
+
+static INLINE boolean
+nv_value_allocated(struct nv_value *value)
+{
+   return (value->reg.id >= 0) ? TRUE : FALSE;
+}
+
+static INLINE void
+nv_print_address(const char c, int buf, struct nv_value *a, int offset)
+{
+   const char ac = (a && nv_value_allocated(a)) ? '$' : '%';
+   char sg;
+
+   if (offset < 0) {
+      sg = '-';
+      offset = -offset;
+   } else {
+      sg = '+';
+   }
+
+   if (buf >= 0)
+      PRINT(" %s%c%i[", cyan, c, buf);
+   else
+      PRINT(" %s%c[", cyan, c);
+   if (a)
+      PRINT("%s%ca%i%s%c", mgta, ac, nv_value_id(a), cyan, sg);
+   PRINT("%s0x%x%s]", yllw, offset, cyan);
+}
+
+static INLINE void
+nv_print_value(struct nv_value *value, struct nv_value *indir, ubyte type)
+{
+   char reg_pfx = nv_value_allocated(value->join) ? '$' : '%';
+
+   if (value->reg.file != NV_FILE_PRED)
+      PRINT(" %s%s", gree, nv_type_name(type, value->reg.size));
+
+   switch (value->reg.file) {
+   case NV_FILE_GPR:
+      PRINT(" %s%cr%i", blue, reg_pfx, nv_value_id(value));
+      if (value->reg.size == 8)
+         PRINT("d");
+      if (value->reg.size == 16)
+         PRINT("q");
+      break;
+   case NV_FILE_PRED:
+      PRINT(" %s%cp%i", mgta, reg_pfx, nv_value_id(value));
+      break;
+   case NV_FILE_COND:
+      PRINT(" %s%cc%i", mgta, reg_pfx, nv_value_id(value));
+      break;
+   case NV_FILE_MEM_L:
+      nv_print_address('l', -1, indir, value->reg.address);
+      break;
+   case NV_FILE_MEM_G:
+      nv_print_address('g', -1, indir, value->reg.address);
+      break;
+   case NV_FILE_MEM_A:
+      nv_print_address('a', -1, indir, value->reg.address);
+      break;
+   case NV_FILE_MEM_V:
+      nv_print_address('v', -1, indir, value->reg.address);
+      break;
+   case NV_FILE_IMM:
+      switch (type) {
+      case NV_TYPE_U16:
+      case NV_TYPE_S16:
+         PRINT(" %s0x%04x", yllw, value->reg.imm.u32);
+         break;
+      case NV_TYPE_F32:
+         PRINT(" %s%f", yllw, value->reg.imm.f32);
+         break;
+      case NV_TYPE_F64:
+         PRINT(" %s%f", yllw, value->reg.imm.f64);
+         break;
+      case NV_TYPE_U32:
+      case NV_TYPE_S32:
+      case NV_TYPE_P32:
+      case NV_TYPE_ANY:
+         PRINT(" %s0x%08x", yllw, value->reg.imm.u32);
+         break;
+      }
+      break;
+   default:
+      if (value->reg.file >= NV_FILE_MEM_C(0) &&
+          value->reg.file <= NV_FILE_MEM_C(15))
+         nv_print_address('c', value->reg.file - NV_FILE_MEM_C(0), indir,
+                          value->reg.address);
+      else
+         NOUVEAU_ERR(" BAD_FILE[%i]", nv_value_id(value));
+      break;
+   }
+}
+
+static INLINE void
+nv_print_ref(struct nv_ref *ref, struct nv_value *indir, ubyte type)
+{
+   nv_print_value(ref->value, indir, type);
+}
+
+void
+nvc0_print_instruction(struct nv_instruction *i)
+{
+   int s;
+
+   PRINT("%i: ", i->serial);
+
+   if (i->predicate >= 0) {
+      PRINT("%s%s", gree, i->cc ? "fl" : "tr");
+      nv_print_ref(i->src[i->predicate], NULL, NV_TYPE_U8);
+      PRINT(" ");
+   }
+
+   PRINT("%s", gree);
+   if (NV_BASEOP(i->opcode) == NV_OP_SET)
+      PRINT("set %s", nv_cond_name(i->set_cond));
+   else
+   if (i->saturate)
+      PRINT("sat %s", nvc0_opcode_name(i->opcode));
+   else
+      PRINT("%s", nvc0_opcode_name(i->opcode));
+
+   if (i->opcode == NV_OP_CVT)
+      nv_print_value(i->def[0], NULL, i->ext.cvt.d);
+   else
+   if (i->def[0])
+      nv_print_value(i->def[0], NULL, NV_OPTYPE(i->opcode));
+   else
+   if (i->target)
+      PRINT(" %s(BB:%i)", yllw, i->target->id);
+   else
+      PRINT(" #");
+
+   for (s = 1; s < 4 && i->def[s]; ++s)
+      nv_print_value(i->def[s], NULL, NV_OPTYPE(i->opcode));
+   if (s > 1)
+      PRINT("%s ,", norm);
+
+   for (s = 0; s < 6 && i->src[s]; ++s) {
+      ubyte type;
+      if (s == i->indirect || s == i->predicate)
+         continue;
+      if (i->opcode == NV_OP_CVT)
+         type = i->ext.cvt.s;
+      else
+         type = NV_OPTYPE(i->opcode);
+
+      if (i->src[s]->mod)
+         PRINT(" %s%s", gree, nv_modifier_string(i->src[s]->mod));
+
+      if (i->indirect >= 0 &&
+          NV_IS_MEMORY_FILE(i->src[s]->value->reg.file))
+         nv_print_ref(i->src[s], i->src[i->indirect]->value, type);
+      else
+         nv_print_ref(i->src[s], NULL, type);
+   }
+   PRINT(" %s\n", norm);
+}
+
+#define NV_MOD_SGN NV_MOD_ABS | NV_MOD_NEG
+
+struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] =
+{
+   { NV_OP_UNDEF,  "undef",  NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 },
+   { NV_OP_BIND,   "bind",   NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 1, 0, 1, 0, 0 },
+   { NV_OP_MERGE,  "merge",  NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 1, 0, 1, 0, 0 },
+   { NV_OP_PHI,    "phi",    NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 },
+   { NV_OP_SELECT, "select", NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 1, 0, 0 },
+   { NV_OP_NOP,    "nop",    NV_TYPE_ANY, 0, /* fcvpoi */ 0, 0, 0, 0, 0, 0, 0 },
+
+   { NV_OP_LD,  "ld",  NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 },
+   { NV_OP_ST,  "st",  NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 },
+   { NV_OP_MOV, "mov", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 1, 0 },
+   { NV_OP_AND, "and", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 1, 0 },
+   { NV_OP_OR,  "or",  NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 1, 0 },
+   { NV_OP_XOR, "xor", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 1, 0 },
+   { NV_OP_SHL, "shl", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 1, 0 },
+   { NV_OP_SHR, "shr", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 1, 0 },
+   { NV_OP_NOT, "not", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 },
+   { NV_OP_SET, "set", NV_TYPE_ANY, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
+   { NV_OP_ADD, "add", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 1, 2 },
+   { NV_OP_SUB, "sub", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 1, 2 },
+   { NV_OP_MUL, "mul", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 1, 2 },
+   { NV_OP_MAD, "mad", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 0, 2 },
+   { NV_OP_ABS, "abs", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+   { NV_OP_NEG, "neg", NV_TYPE_F32, NV_MOD_ABS, 0, 0, 0, 1, 0, 0, 0 },
+   { NV_OP_MAX, "max", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 0, 2 },
+   { NV_OP_MIN, "min", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 0, 2 },
+   { NV_OP_CVT, "cvt", NV_TYPE_ANY, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
+
+   { NV_OP_CEIL,  "ceil",  NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
+   { NV_OP_FLOOR, "floor", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
+   { NV_OP_TRUNC, "floor", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
+
+   { NV_OP_SAD, "sad", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 },
+
+   { NV_OP_VFETCH,  "vfetch",  NV_TYPE_ANY, 0, 0, 0, 1, 1, 0, 0, 0 },
+   { NV_OP_PFETCH,  "pfetch",  NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 },
+   { NV_OP_EXPORT,  "export",  NV_TYPE_ANY, 0, 0, 0, 1, 1, 0, 0, 0 },
+   { NV_OP_LINTERP, "linterp", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+   { NV_OP_PINTERP, "pinterp", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+   { NV_OP_EMIT,    "emit",    NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 },
+   { NV_OP_RESTART, "restart", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 },
+
+   { NV_OP_TEX, "tex",      NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 },
+   { NV_OP_TXB, "texbias",  NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 },
+   { NV_OP_TXL, "texlod",   NV_TYPE_F32, 0, 0, 0, 1, 1, 0, 0, 0 },
+   { NV_OP_TXF, "texfetch", NV_TYPE_U32, 0, 0, 0, 1, 1, 0, 0, 0 },
+   { NV_OP_TXQ, "texquery", NV_TYPE_U32, 0, 0, 0, 1, 1, 0, 0, 0 },
+
+   { NV_OP_QUADOP, "quadop", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+   { NV_OP_DFDX,   "dfdx",   NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+   { NV_OP_DFDY,   "dfdy",   NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+
+   { NV_OP_KIL,  "kil",  NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 0, 0 },
+   { NV_OP_BRA,  "bra",  NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
+   { NV_OP_CALL, "call", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
+   { NV_OP_RET,  "ret",  NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
+   { NV_OP_RET,  "exit", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
+   { NV_OP_NOP,  "ud",   NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
+   { NV_OP_NOP,  "ud",   NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
+
+   { NV_OP_JOINAT, "joinat", NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
+   { NV_OP_JOIN,   "join",   NV_TYPE_ANY, 0, 1, 0, 0, 1, 0, 0, 0 },
+
+   { NV_OP_ADD, "add", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 1, 0 },
+   { NV_OP_MUL, "mul", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 1, 0 },
+   { NV_OP_ABS, "abs", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 },
+   { NV_OP_NEG, "neg", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 },
+   { NV_OP_MAX, "max", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 },
+   { NV_OP_MIN, "max", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 },
+   { NV_OP_MAX, "min", NV_TYPE_S32, 0, 0, 1, 0, 1, 0, 0, 0 },
+   { NV_OP_MIN, "min", NV_TYPE_U32, 0, 0, 1, 0, 1, 0, 0, 0 },
+   { NV_OP_SET, "set", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 },
+   { NV_OP_SET, "set", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 },
+   { NV_OP_SET, "set", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 },
+   { NV_OP_SHR, "sar", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 1, 0 },
+   { NV_OP_RCP, "rcp", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
+   { NV_OP_RSQ, "rsqrt", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
+   { NV_OP_LG2, "lg2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
+   { NV_OP_SIN, "sin", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 },
+   { NV_OP_COS, "cos", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 },
+   { NV_OP_EX2, "ex2", NV_TYPE_F32, 0, 0, 0, 0, 0, 0, 0, 0 },
+   { NV_OP_PRESIN, "presin", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
+   { NV_OP_PREEX2, "preex2", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 0, 0, 0, 0 },
+   { NV_OP_SAT, "sat", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+
+   { NV_OP_SET_F32_AND, "and set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+   { NV_OP_SET_F32_OR,  "or set",  NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+   { NV_OP_SET_F32_XOR, "xor set", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+
+   { NV_OP_SELP, "selp", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 },
+
+   { NV_OP_SLCT_F32, "slct", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
+   { NV_OP_SLCT_F32, "slct", NV_TYPE_S32, 0, 0, 0, 0, 1, 0, 0, 0 },
+   { NV_OP_SLCT_F32, "slct", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 },
+
+   { NV_OP_ADD, "sub", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 1, 0 },
+
+   { NV_OP_FSET_F32, "fset", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 2 },
+
+   { NV_OP_UNDEF, "BAD_OP", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 }
+};
diff --git a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c
new file mode 100644 (file)
index 0000000..6f9d5de
--- /dev/null
@@ -0,0 +1,925 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define NOUVEAU_DEBUG 1
+
+/* #define NVC0_RA_DEBUG_LIVEI */
+/* #define NVC0_RA_DEBUG_LIVE_SETS */
+/* #define NVC0_RA_DEBUG_JOIN */
+
+#include "nvc0_pc.h"
+#include "util/u_simple_list.h"
+
+#define NVC0_NUM_REGISTER_FILES 3
+
+/* @unit_shift: log2 of min allocation unit for register */
+struct register_set {
+   uint32_t bits[NVC0_NUM_REGISTER_FILES][2];
+   uint32_t last[NVC0_NUM_REGISTER_FILES];
+   int log2_unit[NVC0_NUM_REGISTER_FILES];
+   struct nv_pc *pc;
+};
+
+struct nv_pc_pass {
+   struct nv_pc *pc;
+   struct nv_instruction **insns;
+   uint num_insns;
+   uint pass_seq;
+};
+
+static void
+ranges_coalesce(struct nv_range *range)
+{
+   while (range->next && range->end >= range->next->bgn) {
+      struct nv_range *rnn = range->next->next;
+      assert(range->bgn <= range->next->bgn);
+      range->end = MAX2(range->end, range->next->end);
+      FREE(range->next);
+      range->next = rnn;
+   }
+}
+
+static boolean
+add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range)
+{
+   struct nv_range *range, **nextp = &val->livei;
+
+   for (range = val->livei; range; range = range->next) {
+      if (end < range->bgn)
+         break; /* insert before */
+
+      if (bgn > range->end) {
+         nextp = &range->next;
+         continue; /* insert after */
+      }
+
+      /* overlap */
+      if (bgn < range->bgn) {
+         range->bgn = bgn;
+         if (end > range->end)
+            range->end = end;
+         ranges_coalesce(range);
+         return TRUE;
+      }
+      if (end > range->end) {
+         range->end = end;
+         ranges_coalesce(range);
+         return TRUE;
+      }
+      assert(bgn >= range->bgn);
+      assert(end <= range->end);
+      return TRUE;
+   }
+
+   if (!new_range)
+      new_range = CALLOC_STRUCT(nv_range);
+
+   new_range->bgn = bgn;
+   new_range->end = end;
+   new_range->next = range;
+   *(nextp) = new_range;
+   return FALSE;
+}
+
+static void
+add_range(struct nv_value *val, struct nv_basic_block *b, int end)
+{
+   int bgn;
+
+   if (!val->insn) /* ignore non-def values */
+      return;
+   assert(b->entry->serial <= b->exit->serial);
+   assert(b->phi->serial <= end);
+   assert(b->exit->serial + 1 >= end);
+
+   bgn = val->insn->serial;
+   if (bgn < b->entry->serial || bgn > b->exit->serial)
+      bgn = b->entry->serial;
+
+   assert(bgn <= end);
+
+   add_range_ex(val, bgn, end, NULL);
+}
+
+#if defined(NVC0_RA_DEBUG_JOIN) || defined(NVC0_RA_DEBUG_LIVEI)
+static void
+livei_print(struct nv_value *a)
+{
+   struct nv_range *r = a->livei;
+
+   debug_printf("livei %i: ", a->n);
+   while (r) {
+      debug_printf("[%i, %i) ", r->bgn, r->end);
+      r = r->next;
+   }
+   debug_printf("\n");
+}
+#endif
+
+static void
+livei_unify(struct nv_value *dst, struct nv_value *src)
+{
+   struct nv_range *range, *next;
+
+   for (range = src->livei; range; range = next) {
+      next = range->next;
+      if (add_range_ex(dst, range->bgn, range->end, range))
+         FREE(range);
+   }
+   src->livei = NULL;
+}
+
+static void
+livei_release(struct nv_value *val)
+{
+   struct nv_range *range, *next;
+
+   for (range = val->livei; range; range = next) {
+      next = range->next;
+      FREE(range);
+   }
+}
+
+static boolean
+livei_have_overlap(struct nv_value *a, struct nv_value *b)
+{
+   struct nv_range *r_a, *r_b;
+
+   for (r_a = a->livei; r_a; r_a = r_a->next) {
+      for (r_b = b->livei; r_b; r_b = r_b->next) {
+         if (r_b->bgn < r_a->end &&
+             r_b->end > r_a->bgn)
+            return TRUE;
+      }
+   }
+   return FALSE;
+}
+
+static int
+livei_end(struct nv_value *a)
+{
+   struct nv_range *r = a->livei;
+
+   assert(r);
+   while (r->next)
+      r = r->next;
+   return r->end;
+}
+
+static boolean
+livei_contains(struct nv_value *a, int pos)
+{
+   struct nv_range *r;
+
+   for (r = a->livei; r && r->bgn <= pos; r = r->next)
+      if (r->end > pos)
+         return TRUE;
+   return FALSE;
+}
+
+static boolean
+reg_assign(struct register_set *set, struct nv_value **def, int n)
+{
+   int i, id, s, k;
+   uint32_t m;
+   int f = def[0]->reg.file;
+
+   k = n;
+   if (k == 3)
+      k = 4;
+   s = (k * def[0]->reg.size) >> set->log2_unit[f];
+   m = (1 << s) - 1;
+
+   id = set->last[f];
+
+   for (i = 0; i * 32 < set->last[f]; ++i) {
+      if (set->bits[f][i] == 0xffffffff)
+         continue;
+
+      for (id = 0; id < 32; id += s)
+         if (!(set->bits[f][i] & (m << id)))
+            break;
+      if (id < 32)
+         break;
+   }
+   if (i * 32 + id > set->last[f])
+      return FALSE;
+
+   set->bits[f][i] |= m << id;
+
+   id += i * 32;
+
+   set->pc->max_reg[f] = MAX2(set->pc->max_reg[f], id + s - 1);
+
+   for (i = 0; i < n; ++i)
+      if (def[i]->livei)
+         def[i]->reg.id = id++;
+
+   return TRUE;
+}
+
+static INLINE void
+reg_occupy(struct register_set *set, struct nv_value *val)
+{
+   int id = val->reg.id, f = val->reg.file;
+   uint32_t m;
+
+   if (id < 0)
+      return;
+   m = (1 << (val->reg.size >> set->log2_unit[f])) - 1;
+
+   set->bits[f][id / 32] |= m << (id % 32);
+
+   if (set->pc->max_reg[f] < id)
+      set->pc->max_reg[f] = id;
+}
+
+static INLINE void
+reg_release(struct register_set *set, struct nv_value *val)
+{
+   int id = val->reg.id, f = val->reg.file;
+   uint32_t m;
+
+   if (id < 0)
+      return;
+   m = (1 << (val->reg.size >> set->log2_unit[f])) - 1;
+
+   set->bits[f][id / 32] &= ~(m << (id % 32));
+}
+
+static INLINE boolean
+join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
+{
+   int i;
+   struct nv_value *val;
+
+   if (a->reg.file != b->reg.file || a->reg.size != b->reg.size)
+      return FALSE;
+
+   if (a->join->reg.id == b->join->reg.id)
+      return TRUE;
+
+   /* either a or b or both have been assigned */
+
+   if (a->join->reg.id >= 0 && b->join->reg.id >= 0)
+      return FALSE;
+   else
+   if (b->join->reg.id >= 0) {
+      if (b->join->reg.id == 63)
+         return FALSE;
+      val = a;
+      a = b;
+      b = val;
+   } else
+   if (a->join->reg.id == 63)
+      return FALSE;
+
+   for (i = 0; i < ctx->pc->num_values; ++i) {
+      val = &ctx->pc->values[i];
+
+      if (val->join->reg.id != a->join->reg.id)
+         continue;
+      if (val->join != a->join && livei_have_overlap(val->join, b->join))
+         return FALSE;
+   }
+   return TRUE;
+}
+
+static INLINE void
+do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
+{
+   int j;
+   struct nv_value *bjoin = b->join;
+
+   if (b->join->reg.id >= 0)
+      a->join->reg.id = b->join->reg.id;
+
+   livei_unify(a->join, b->join);
+
+#ifdef NVC0_RA_DEBUG_JOIN
+   debug_printf("joining %i to %i\n", b->n, a->n);
+#endif
+   
+   /* make a->join the new representative */
+   for (j = 0; j < ctx->pc->num_values; ++j) 
+      if (ctx->pc->values[j].join == bjoin)
+         ctx->pc->values[j].join = a->join;
+
+   assert(b->join == a->join);
+}
+
+static INLINE void
+try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
+{
+   if (!join_allowed(ctx, a, b)) {
+#ifdef NVC0_RA_DEBUG_JOIN
+      debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n);
+#endif
+      return;
+   }
+   if (livei_have_overlap(a->join, b->join)) {
+#ifdef NVC0_RA_DEBUG_JOIN
+      debug_printf("cannot join %i to %i: livei overlap\n", b->n, a->n);
+      livei_print(a);
+      livei_print(b);
+#endif
+      return;
+   }
+
+   do_join_values(ctx, a, b);
+}
+
+static INLINE boolean
+need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p)
+{
+   int i = 0, n = 0;
+
+   for (; i < 2; ++i)
+      if (p->out[i] && !IS_LOOP_EDGE(p->out_kind[i]))
+         ++n;
+
+   return (b->num_in > 1) && (n == 2);
+}
+
+static int
+phi_opnd_for_bb(struct nv_instruction *phi, struct nv_basic_block *b,
+                struct nv_basic_block *tb)
+{
+   int i, j;
+
+   for (j = -1, i = 0; i < 6 && phi->src[i]; ++i) {
+      if (!nvc0_bblock_reachable_by(b, phi->src[i]->value->insn->bb, tb))
+         continue;
+      /* NOTE: back-edges are ignored by the reachable-by check */
+      if (j < 0 || !nvc0_bblock_reachable_by(phi->src[j]->value->insn->bb,
+                                             phi->src[i]->value->insn->bb, tb))
+         j = i;
+   }
+   return j;
+}
+
+/* For each operand of each PHI in b, generate a new value by inserting a MOV
+ * at the end of the block it is coming from and replace the operand with its
+ * result. This eliminates liveness conflicts and enables us to let values be
+ * copied to the right register if such a conflict exists nonetheless.
+ *
+ * These MOVs are also crucial in making sure the live intervals of phi srces
+ * are extended until the end of the loop, since they are not included in the
+ * live-in sets.
+ */
+static int
+pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
+{
+   struct nv_instruction *i, *ni;
+   struct nv_value *val;
+   struct nv_basic_block *p, *pn;
+   int n, j;
+
+   b->pass_seq = ctx->pc->pass_seq;
+
+   for (n = 0; n < b->num_in; ++n) {
+      p = pn = b->in[n];
+      assert(p);
+
+      if (need_new_else_block(b, p)) {
+         pn = new_basic_block(ctx->pc);
+
+         if (p->out[0] == b)
+            p->out[0] = pn;
+         else
+            p->out[1] = pn;
+
+         if (p->exit->target == b) /* target to new else-block */
+            p->exit->target = pn;
+
+         b->in[n] = pn;
+
+         pn->out[0] = b;
+         pn->in[0] = p;
+         pn->num_in = 1;
+      }
+      ctx->pc->current_block = pn;
+
+      for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) {
+         if ((j = phi_opnd_for_bb(i, p, b)) < 0)
+            continue;
+         val = i->src[j]->value;
+
+         if (i->src[j]->flags) {
+            /* value already encountered from a different in-block */
+            val = val->insn->src[0]->value;
+            while (j < 6 && i->src[j])
+               ++j;
+            assert(j < 6);
+         }
+
+         ni = new_instruction(ctx->pc, NV_OP_MOV);
+
+         /* TODO: insert instruction at correct position in the first place */
+         if (ni->prev && ni->prev->target)
+            nvc0_insns_permute(ni->prev, ni);
+
+         ni->def[0] = new_value_like(ctx->pc, val);
+         ni->def[0]->insn = ni;
+         nv_reference(ctx->pc, ni, 0, val);
+         nv_reference(ctx->pc, i, j, ni->def[0]); /* new phi source = MOV def */
+         i->src[j]->flags = 1;
+      }
+
+      if (pn != p && pn->exit) {
+         ctx->pc->current_block = b->in[n ? 0 : 1];
+         ni = new_instruction(ctx->pc, NV_OP_BRA);
+         ni->target = b;
+         ni->terminator = 1;
+      }
+   }
+
+   for (j = 0; j < 2; ++j)
+      if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq)
+         pass_generate_phi_movs(ctx, b->out[j]);
+
+   return 0;
+}
+
+static int
+pass_join_values(struct nv_pc_pass *ctx, int iter)
+{
+   int c, n;
+
+   for (n = 0; n < ctx->num_insns; ++n) {
+      struct nv_instruction *i = ctx->insns[n];
+
+      switch (i->opcode) {
+      case NV_OP_PHI:
+         if (iter != 2)
+            break;
+         for (c = 0; c < 6 && i->src[c]; ++c)
+            try_join_values(ctx, i->def[0], i->src[c]->value);
+         break;
+      case NV_OP_MOV:
+         if ((iter == 2) && i->src[0]->value->insn &&
+             !nv_is_texture_op(i->src[0]->value->join->insn->opcode))
+            try_join_values(ctx, i->def[0], i->src[0]->value);
+         break;
+      case NV_OP_SELECT:
+         if (iter != 1)
+            break;
+         for (c = 0; c < 6 && i->src[c]; ++c) {
+            assert(join_allowed(ctx, i->def[0], i->src[c]->value));
+            do_join_values(ctx, i->def[0], i->src[c]->value);
+         }
+         break;
+      case NV_OP_TEX:
+      case NV_OP_TXB:
+      case NV_OP_TXL:
+      case NV_OP_TXQ:
+      case NV_OP_BIND:
+         if (iter)
+            break;
+         for (c = 0; c < 6 && i->src[c]; ++c)
+            do_join_values(ctx, i->def[c], i->src[c]->value);
+         break;
+      default:
+         break;
+      }
+   }
+   return 0;
+}
+
+/* Order the instructions so that live intervals can be expressed in numbers. */
+static void
+pass_order_instructions(void *priv, struct nv_basic_block *b)
+{
+   struct nv_pc_pass *ctx = (struct nv_pc_pass *)priv;
+   struct nv_instruction *i;
+
+   b->pass_seq = ctx->pc->pass_seq;
+
+   assert(!b->exit || !b->exit->next);
+   for (i = b->phi; i; i = i->next) {
+      i->serial = ctx->num_insns;
+      ctx->insns[ctx->num_insns++] = i;
+   }
+}
+
+static void
+bb_live_set_print(struct nv_pc *pc, struct nv_basic_block *b)
+{
+#ifdef NVC0_RA_DEBUG_LIVE_SETS
+   struct nv_value *val;
+   int j;
+
+   debug_printf("LIVE-INs of BB:%i: ", b->id);
+
+   for (j = 0; j < pc->num_values; ++j) {
+      if (!(b->live_set[j / 32] & (1 << (j % 32))))
+         continue;
+      val = &pc->values[j];
+      if (!val->insn)
+         continue;
+      debug_printf("%i ", val->n);
+   }
+   debug_printf("\n");
+#endif
+}
+
+static INLINE void
+live_set_add(struct nv_basic_block *b, struct nv_value *val)
+{
+   if (!val->insn) /* don't add non-def values */
+      return;
+   b->live_set[val->n / 32] |= 1 << (val->n % 32);
+}
+
+static INLINE void
+live_set_rem(struct nv_basic_block *b, struct nv_value *val)
+{
+   b->live_set[val->n / 32] &= ~(1 << (val->n % 32));
+}
+
+static INLINE boolean
+live_set_test(struct nv_basic_block *b, struct nv_ref *ref)
+{
+   int n = ref->value->n;
+   return b->live_set[n / 32] & (1 << (n % 32));
+}
+
+/* The live set of a block contains those values that are live immediately
+ * before the beginning of the block, so do a backwards scan.
+ */
+static int
+pass_build_live_sets(struct nv_pc_pass *ctx, struct nv_basic_block *b)
+{
+   struct nv_instruction *i;
+   int j, n, ret = 0;
+
+   if (b->pass_seq >= ctx->pc->pass_seq)
+      return 0;
+   b->pass_seq = ctx->pc->pass_seq;
+
+   /* slight hack for undecidedness: set phi = entry if it's undefined */
+   if (!b->phi)
+      b->phi = b->entry;
+
+   for (n = 0; n < 2; ++n) {
+      if (!b->out[n] || b->out[n] == b)
+         continue;
+      ret = pass_build_live_sets(ctx, b->out[n]);
+      if (ret)
+         return ret;
+
+      if (n == 0) {
+         for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j)
+            b->live_set[j] = b->out[n]->live_set[j];
+      } else {
+         for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j)
+            b->live_set[j] |= b->out[n]->live_set[j];
+      }
+   }
+
+   if (!b->entry)
+      return 0;
+
+   bb_live_set_print(ctx->pc, b);
+
+   for (i = b->exit; i != b->entry->prev; i = i->prev) {
+      for (j = 0; j < 5 && i->def[j]; j++)
+         live_set_rem(b, i->def[j]);
+      for (j = 0; j < 6 && i->src[j]; j++)
+         live_set_add(b, i->src[j]->value);
+   }
+   for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next)
+      live_set_rem(b, i->def[0]);
+
+   bb_live_set_print(ctx->pc, b);
+
+   return 0;
+}
+
+static void collect_live_values(struct nv_basic_block *b, const int n)
+{
+   int i;
+
+   if (b->out[0]) {
+      if (b->out[1]) { /* what to do about back-edges ? */
+         for (i = 0; i < n; ++i)
+            b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i];
+      } else {
+         memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t));
+      }
+   } else
+   if (b->out[1]) {
+      memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t));
+   } else {
+      memset(b->live_set, 0, n * sizeof(uint32_t));
+   }
+}
+
+/* NOTE: the live intervals of phi functions start at the first non-phi insn. */
+static int
+pass_build_intervals(struct nv_pc_pass *ctx, struct nv_basic_block *b)
+{
+   struct nv_instruction *i, *i_stop;
+   int j, s;
+   const int n = (ctx->pc->num_values + 31) / 32;
+
+   /* verify that first block does not have live-in values */
+   if (b->num_in == 0)
+      for (j = 0; j < n; ++j)
+         assert(b->live_set[j] == 0);
+
+   collect_live_values(b, n);
+
+   /* remove live-outs def'd in a parallel block, hopefully they're all phi'd */
+   for (j = 0; j < 2; ++j) {
+      if (!b->out[j] || !b->out[j]->phi)
+         continue;
+      for (i = b->out[j]->phi; i->opcode == NV_OP_PHI; i = i->next) {
+         live_set_rem(b, i->def[0]);
+
+         for (s = 0; s < 6 && i->src[s]; ++s) {
+            assert(i->src[s]->value->insn);
+            if (nvc0_bblock_reachable_by(b, i->src[s]->value->insn->bb,
+                                         b->out[j]))
+               live_set_add(b, i->src[s]->value);
+            else
+               live_set_rem(b, i->src[s]->value);
+         }
+      }
+   }
+
+   /* remaining live-outs are live until the end */
+   if (b->exit) {
+      for (j = 0; j < ctx->pc->num_values; ++j) {
+         if (!(b->live_set[j / 32] & (1 << (j % 32))))
+            continue;
+         add_range(&ctx->pc->values[j], b, b->exit->serial + 1);
+#ifdef NVC0_RA_DEBUG_LIVEI
+         debug_printf("adding range for live value %i: ", j);
+         livei_print(&ctx->pc->values[j]);
+#endif
+      }
+   }
+
+   i_stop = b->entry ? b->entry->prev : NULL;
+
+   /* don't have to include phi functions here (will have 0 live range) */
+   for (i = b->exit; i != i_stop; i = i->prev) {
+      assert(i->serial >= b->phi->serial && i->serial <= b->exit->serial);
+      for (j = 0; j < 4 && i->def[j]; ++j)
+         live_set_rem(b, i->def[j]);
+
+      for (j = 0; j < 6 && i->src[j]; ++j) {
+         if (!live_set_test(b, i->src[j])) {
+            live_set_add(b, i->src[j]->value);
+            add_range(i->src[j]->value, b, i->serial);
+#ifdef NVC0_RA_DEBUG_LIVEI
+            debug_printf("adding range for source %i (ends living): ",
+                         i->src[j]->value->n);
+            livei_print(i->src[j]->value);
+#endif
+         }
+      }
+   }
+
+   b->pass_seq = ctx->pc->pass_seq;
+
+   if (b->out[0] && b->out[0]->pass_seq < ctx->pc->pass_seq)
+      pass_build_intervals(ctx, b->out[0]);
+
+   if (b->out[1] && b->out[1]->pass_seq < ctx->pc->pass_seq)
+      pass_build_intervals(ctx, b->out[1]);
+
+   return 0;
+}
+
+static INLINE void
+nvc0_ctor_register_set(struct nv_pc *pc, struct register_set *set)
+{
+   memset(set, 0, sizeof(*set));
+
+   set->last[NV_FILE_GPR] = 62;
+   set->last[NV_FILE_PRED] = 6;
+   set->last[NV_FILE_COND] = 1;
+
+   set->log2_unit[NV_FILE_GPR] = 2;
+   set->log2_unit[NV_FILE_COND] = 0;
+   set->log2_unit[NV_FILE_PRED] = 0;
+
+   set->pc = pc;
+}
+
+static void
+insert_ordered_tail(struct nv_value *list, struct nv_value *nval)
+{
+   struct nv_value *elem;
+
+   for (elem = list->prev;
+        elem != list && elem->livei->bgn > nval->livei->bgn;
+        elem = elem->prev);
+   /* now elem begins before or at the same time as val */
+
+   nval->prev = elem;
+   nval->next = elem->next;
+   elem->next->prev = nval;
+   elem->next = nval;
+}
+
+static int
+pass_linear_scan(struct nv_pc_pass *ctx, int iter)
+{
+   struct nv_instruction *i;
+   struct register_set f, free;
+   int k, n;
+   struct nv_value *cur, *val, *tmp[2];
+   struct nv_value active, inactive, handled, unhandled;
+
+   make_empty_list(&active);
+   make_empty_list(&inactive);
+   make_empty_list(&handled);
+   make_empty_list(&unhandled);
+
+   nvc0_ctor_register_set(ctx->pc, &free);
+
+   /* joined values should have range = NULL and thus not be added;
+    * also, fixed memory values won't be added because they're not
+    * def'd, just used
+    */
+   for (n = 0; n < ctx->num_insns; ++n) {
+      i = ctx->insns[n];
+
+      for (k = 0; k < 5; ++k) {
+         if (i->def[k] && i->def[k]->livei)
+            insert_ordered_tail(&unhandled, i->def[k]);
+         else
+         if (0 && i->def[k])
+            debug_printf("skipping def'd value %i: no livei\n", i->def[k]->n);
+      }
+   }
+
+   for (val = unhandled.next; val != unhandled.prev; val = val->next) {
+      assert(val->join == val);
+      assert(val->livei->bgn <= val->next->livei->bgn);
+   }
+
+   foreach_s(cur, tmp[0], &unhandled) {
+      remove_from_list(cur);
+
+      foreach_s(val, tmp[1], &active) {
+         if (livei_end(val) <= cur->livei->bgn) {
+            reg_release(&free, val);
+            move_to_head(&handled, val);
+         } else
+         if (!livei_contains(val, cur->livei->bgn)) {
+            reg_release(&free, val);
+            move_to_head(&inactive, val);
+         }
+      }
+
+      foreach_s(val, tmp[1], &inactive) {
+         if (livei_end(val) <= cur->livei->bgn)
+            move_to_head(&handled, val);
+         else
+         if (livei_contains(val, cur->livei->bgn)) {
+            reg_occupy(&free, val);
+            move_to_head(&active, val);
+         }
+      }
+
+      f = free;
+
+      foreach(val, &inactive)
+         if (livei_have_overlap(val, cur))
+            reg_occupy(&f, val);
+
+      foreach(val, &unhandled)
+         if (val->reg.id >= 0 && livei_have_overlap(val, cur))
+            reg_occupy(&f, val);
+
+      if (cur->reg.id < 0) {
+         boolean mem = FALSE;
+         int v = nvi_vector_size(cur->insn);
+
+         if (v > 1)
+            mem = !reg_assign(&f, &cur->insn->def[0], v);
+         else
+         if (iter)
+            mem = !reg_assign(&f, &cur, 1);
+
+         if (mem) {
+            NOUVEAU_ERR("out of registers\n");
+            abort();
+         }
+      }
+      insert_at_head(&active, cur);
+      reg_occupy(&free, cur);
+   }
+
+   return 0;
+}
+
+static int
+nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root)
+{
+   struct nv_pc_pass *ctx;
+   int i, ret;
+
+   NOUVEAU_DBG("REGISTER ALLOCATION - entering\n");
+
+   ctx = CALLOC_STRUCT(nv_pc_pass);
+   if (!ctx)
+      return -1;
+   ctx->pc = pc;
+
+   ctx->insns = CALLOC(NV_PC_MAX_INSTRUCTIONS, sizeof(struct nv_instruction *));
+   if (!ctx->insns) {
+      FREE(ctx);
+      return -1;
+   }
+
+   pc->pass_seq++;
+   ret = pass_generate_phi_movs(ctx, root);
+   assert(!ret);
+
+   for (i = 0; i < pc->loop_nesting_bound; ++i) {
+      pc->pass_seq++;
+      ret = pass_build_live_sets(ctx, root);
+      assert(!ret && "live sets");
+      if (ret) {
+         NOUVEAU_ERR("failed to build live sets (iteration %d)\n", i);
+         goto out;
+      }
+   }
+
+   pc->pass_seq++;
+   nvc0_pc_pass_in_order(root, pass_order_instructions, ctx);
+
+   pc->pass_seq++;
+   ret = pass_build_intervals(ctx, root);
+   assert(!ret && "build intervals");
+   if (ret) {
+      NOUVEAU_ERR("failed to build live intervals\n");
+      goto out;
+   }
+
+#ifdef NVC0_RA_DEBUG_LIVEI
+   for (i = 0; i < pc->num_values; ++i)
+      livei_print(&pc->values[i]);
+#endif
+
+   ret = pass_join_values(ctx, 0);
+   if (ret)
+      goto out;
+   ret = pass_linear_scan(ctx, 0);
+   if (ret)
+      goto out;
+   ret = pass_join_values(ctx, 1);
+   if (ret)
+      goto out;
+   ret = pass_join_values(ctx, 2);
+   if (ret)
+      goto out;
+   ret = pass_linear_scan(ctx, 1);
+   if (ret)
+      goto out;
+
+   for (i = 0; i < pc->num_values; ++i)
+      livei_release(&pc->values[i]);
+
+   NOUVEAU_DBG("REGISTER ALLOCATION - leaving\n");
+
+out:
+   FREE(ctx->insns);
+   FREE(ctx);
+   return ret;
+}
+
+int
+nvc0_pc_exec_pass1(struct nv_pc *pc)
+{
+   int i, ret;
+
+   for (i = 0; i < pc->num_subroutines + 1; ++i)
+      if (pc->root[i] && (ret = nv_pc_pass1(pc, pc->root[i])))
+         return ret;
+   return 0;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c
new file mode 100644 (file)
index 0000000..8cc161a
--- /dev/null
@@ -0,0 +1,595 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_shader_tokens.h"
+#include "pipe/p_defines.h"
+
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+#include "tgsi/tgsi_dump.h"
+
+#include "nvc0_context.h"
+#include "nvc0_pc.h"
+
+#define NOUVEAU_DEBUG_BITS 1
+
+static unsigned
+nvc0_tgsi_src_mask(const struct tgsi_full_instruction *inst, int c)
+{
+   unsigned mask = inst->Dst[0].Register.WriteMask;
+
+   switch (inst->Instruction.Opcode) {
+   case TGSI_OPCODE_COS:
+   case TGSI_OPCODE_SIN:
+      return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0);
+   case TGSI_OPCODE_DP3:
+      return 0x7;
+   case TGSI_OPCODE_DP4:
+   case TGSI_OPCODE_DPH:
+   case TGSI_OPCODE_KIL: /* WriteMask ignored */
+      return 0xf;
+   case TGSI_OPCODE_DST:
+      return mask & (c ? 0xa : 0x6);
+   case TGSI_OPCODE_EX2:
+   case TGSI_OPCODE_EXP:
+   case TGSI_OPCODE_LG2:
+   case TGSI_OPCODE_LOG:
+   case TGSI_OPCODE_POW:
+   case TGSI_OPCODE_RCP:
+   case TGSI_OPCODE_RSQ:
+   case TGSI_OPCODE_SCS:
+      return 0x1;
+   case TGSI_OPCODE_IF:
+      return 0x1;
+   case TGSI_OPCODE_LIT:
+      return 0xb;
+   case TGSI_OPCODE_TEX:
+   case TGSI_OPCODE_TXB:
+   case TGSI_OPCODE_TXL:
+   case TGSI_OPCODE_TXP:
+   {
+      const struct tgsi_instruction_texture *tex;
+
+      assert(inst->Instruction.Texture);
+      tex = &inst->Texture;
+
+      mask = 0x7;
+      if (inst->Instruction.Opcode != TGSI_OPCODE_TEX &&
+          inst->Instruction.Opcode != TGSI_OPCODE_TXD)
+         mask |= 0x8; /* bias, lod or proj */
+
+      switch (tex->Texture) {
+      case TGSI_TEXTURE_1D:
+         mask &= 0x9;
+         break;
+      case TGSI_TEXTURE_SHADOW1D:
+         mask &= 0x5;
+         break;
+      case TGSI_TEXTURE_2D:
+         mask &= 0xb;
+         break;
+      default:
+         break;
+      }
+   }
+          return mask;
+   case TGSI_OPCODE_XPD:
+   {
+      unsigned x = 0;
+      if (mask & 1) x |= 0x6;
+      if (mask & 2) x |= 0x5;
+      if (mask & 4) x |= 0x3;
+      return x;
+   }
+   default:
+      break;
+   }
+
+   return mask;
+}
+
+static void
+nvc0_indirect_inputs(struct nvc0_translation_info *ti, int id)
+{
+   int i, c;
+
+   for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i)
+      for (c = 0; c < 4; ++c)
+         ti->input_access[i][c] = id;
+
+   ti->indirect_inputs = TRUE;
+}
+
+static void
+nvc0_indirect_outputs(struct nvc0_translation_info *ti, int id)
+{
+   int i, c;
+
+   for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i)
+      for (c = 0; c < 4; ++c)
+         ti->output_access[i][c] = id;
+
+   ti->indirect_outputs = TRUE;
+}
+
+static INLINE unsigned
+nvc0_system_value_location(unsigned sn, unsigned si)
+{
+   switch (sn) {
+      /*
+   case TGSI_SEMANTIC_VERTEXID:
+      return 0x2fc;
+      */
+   case TGSI_SEMANTIC_PRIMID:
+      return 0x60;
+      /*
+   case TGSI_SEMANTIC_LAYER_INDEX:
+      return 0x64;
+   case TGSI_SEMANTIC_VIEWPORT_INDEX:
+      return 0x68;
+      */
+   case TGSI_SEMANTIC_INSTANCEID:
+      return 0x2f8;
+   default:
+      assert(0);
+      return 0x000;
+   }
+}
+
+static INLINE unsigned
+nvc0_varying_location(unsigned sn, unsigned si)
+{
+   switch (sn) {
+   case TGSI_SEMANTIC_POSITION:
+      return 0x70;
+   case TGSI_SEMANTIC_COLOR:
+      return 0x280 + (si * 16); /* are these hard-wired ? */
+   case TGSI_SEMANTIC_BCOLOR:
+      return 0x2a0 + (si * 16);
+   case TGSI_SEMANTIC_FOG:
+      return 0x270;
+   case TGSI_SEMANTIC_PSIZE:
+      return 0x6c;
+      /*
+   case TGSI_SEMANTIC_PNTC:
+      return 0x2e0;
+      */
+   case TGSI_SEMANTIC_GENERIC:
+      assert(si < 31);
+      return 0x80 + (si * 16);
+   case TGSI_SEMANTIC_NORMAL:
+      return 0x360;
+   case TGSI_SEMANTIC_PRIMID:
+      return 0x40;
+      /*
+   case TGSI_SEMANTIC_CLIP_DISTANCE:
+      return 0x2c0 + (si * 4);
+      */
+   default:
+      assert(0);
+      return 0x000;
+   }
+}
+
+static INLINE unsigned
+nvc0_interp_mode(const struct tgsi_full_declaration *decl)
+{
+   unsigned mode;
+
+   if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_CONSTANT)
+      mode = NVC0_INTERP_FLAT;
+   else
+   if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
+      mode = NVC0_INTERP_PERSPECTIVE;
+   else
+      mode = NVC0_INTERP_LINEAR;
+
+   if (decl->Declaration.Centroid)
+      mode |= NVC0_INTERP_CENTROID;
+
+   return mode;
+}
+
+static void
+prog_immediate(struct nvc0_translation_info *ti,
+               const struct tgsi_full_immediate *imm)
+{
+   int c;
+   unsigned n = ti->immd32_nr++;
+
+   assert(ti->immd32_nr <= ti->scan.immediate_count);
+
+   for (c = 0; c < 4; ++c)
+      ti->immd32[n * 4 + c] = imm->u[c].Uint;
+
+   ti->immd32_ty[n] = imm->Immediate.DataType;
+}
+
+static boolean
+prog_decl(struct nvc0_translation_info *ti,
+          const struct tgsi_full_declaration *decl)
+{
+   unsigned i, c;
+   unsigned sn = TGSI_SEMANTIC_GENERIC;
+   unsigned si = 0;
+   const unsigned first = decl->Range.First;
+   const unsigned last = decl->Range.Last;
+
+   if (decl->Declaration.Semantic) {
+      sn = decl->Semantic.Name;
+      si = decl->Semantic.Index;
+   }
+   
+   switch (decl->Declaration.File) {
+   case TGSI_FILE_INPUT:
+      for (i = first; i <= last; ++i) {
+         if (ti->prog->type == PIPE_SHADER_VERTEX) {
+            sn = TGSI_SEMANTIC_GENERIC;
+            si = i;
+         }
+         for (c = 0; c < 4; ++c)
+            ti->input_loc[i][c] = nvc0_varying_location(sn, si) + c * 4;
+
+         if (ti->prog->type == PIPE_SHADER_FRAGMENT)
+            ti->interp_mode[i] = nvc0_interp_mode(decl);
+      }
+      break;
+   case TGSI_FILE_OUTPUT:
+      for (i = first; i <= last; ++i, ++si) {
+         if (ti->prog->type == PIPE_SHADER_FRAGMENT) {
+            si = i;
+            if (i == ti->fp_depth_output) {
+               ti->output_loc[i][2] = (ti->scan.num_outputs - 1) * 4;
+            } else {
+               if (i > ti->fp_depth_output)
+                  si -= 1;
+               for (c = 0; c < 4; ++c)
+                  ti->output_loc[i][c] = si * 4 + c;
+            }
+         } else {
+            for (c = 0; c < 4; ++c)
+               ti->output_loc[i][c] = nvc0_varying_location(sn, si) + c * 4;
+         }
+      }
+      break;
+   case TGSI_FILE_SYSTEM_VALUE:
+      ti->sysval_loc[i] = nvc0_system_value_location(sn, si);
+      assert(first == last);
+      break;
+   case TGSI_FILE_NULL:
+   case TGSI_FILE_CONSTANT:
+   case TGSI_FILE_TEMPORARY:
+   case TGSI_FILE_SAMPLER:
+   case TGSI_FILE_ADDRESS:
+   case TGSI_FILE_IMMEDIATE:
+   case TGSI_FILE_PREDICATE:
+      break;
+   default:
+      NOUVEAU_ERR("unhandled TGSI_FILE %d\n", decl->Declaration.File);
+      return FALSE;
+   }
+   return TRUE;
+}
+
+static void
+prog_inst(struct nvc0_translation_info *ti,
+          const struct tgsi_full_instruction *inst, int id)
+{
+   const struct tgsi_dst_register *dst;
+   const struct tgsi_src_register *src;
+   int s, c, k;
+   unsigned mask;
+
+   if (inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) {
+      ti->subr[ti->num_subrs].first_insn = id - 1;
+      ti->subr[ti->num_subrs].id = ti->num_subrs + 1; /* id 0 is main program */
+      ++ti->num_subrs;
+   }
+
+   if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
+      dst = &inst->Dst[0].Register;
+
+      for (c = 0; c < 4; ++c) {
+         if (dst->Indirect)
+            nvc0_indirect_outputs(ti, id);
+         if (!(dst->WriteMask & (1 << c)))
+            continue;
+         ti->output_access[dst->Index][c] = id;
+      }
+
+      if (inst->Instruction.Opcode == TGSI_OPCODE_MOV &&
+          inst->Src[0].Register.File == TGSI_FILE_INPUT &&
+          dst->Index == ti->edgeflag_out)
+         ti->prog->vp.edgeflag = inst->Src[0].Register.Index;
+   } else
+   if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) {
+      if (inst->Dst[0].Register.Indirect)
+         ti->require_stores = TRUE;
+   }
+
+   for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
+      src = &inst->Src[s].Register;
+      if (src->File == TGSI_FILE_TEMPORARY)
+         if (inst->Src[s].Register.Indirect)
+            ti->require_stores = TRUE;
+      if (src->File != TGSI_FILE_INPUT)
+         continue;
+      mask = nvc0_tgsi_src_mask(inst, s);
+
+      if (inst->Src[s].Register.Indirect)
+         nvc0_indirect_inputs(ti, id);
+
+      for (c = 0; c < 4; ++c) {
+         if (!(mask & (1 << c)))
+            continue;
+         k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c);
+         if (k <= TGSI_SWIZZLE_W)
+            ti->input_access[src->Index][k] = id;
+      }
+   }
+}
+
+/* Probably should introduce something like struct tgsi_function_declaration
+ * instead of trying to guess inputs/outputs.
+ */
+static void
+prog_subroutine_inst(struct nvc0_subroutine *subr,
+                     const struct tgsi_full_instruction *inst)
+{
+   const struct tgsi_dst_register *dst;
+   const struct tgsi_src_register *src;
+   int s, c, k;
+   unsigned mask;
+
+   for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
+      src = &inst->Src[s].Register;
+      if (src->File != TGSI_FILE_TEMPORARY)
+         continue;
+      mask = nvc0_tgsi_src_mask(inst, s);
+
+      for (c = 0; c < 4; ++c) {
+         k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c);
+
+         if ((mask & (1 << c)) && k < TGSI_SWIZZLE_W)
+            if (!(subr->retv[src->Index / 32][k] & (1 << (src->Index % 32))))
+               subr->argv[src->Index / 32][k] |= 1 << (src->Index % 32);
+      }
+   }
+
+   if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) {
+      dst = &inst->Dst[0].Register;
+
+      for (c = 0; c < 4; ++c)
+         if (dst->WriteMask & (1 << c))
+            subr->retv[dst->Index / 32][c] |= 1 << (dst->Index % 32);
+   }
+}
+
+static int
+nvc0_vp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti)
+{
+   int i, c;
+   unsigned a;
+   
+   vp->hdr[0] = 0x20461;
+   vp->hdr[4] = 0xff000;
+
+   for (a = 0x80/4, i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) {
+      for (c = 0; c < 4; ++c, ++a)
+         if (ti->input_access[i][c])
+            vp->hdr[5 + a / 32] |= 1 << (a % 32); /* VP_ATTR_EN */
+   }
+
+   for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) {
+      a = (ti->output_loc[i][0] - 0x40) / 4;
+      for (c = 0; c < 4; ++c, ++a) {
+         if (!ti->output_access[i][c])
+            continue;
+         vp->hdr[13 + a / 32] |= 1 << (a % 32); /* VP_EXPORT_EN */
+      }
+   }
+
+   return 0;
+}
+
+static int
+nvc0_fp_gen_header(struct nvc0_program *fp, struct nvc0_translation_info *ti)
+{
+   int i, c;
+   unsigned a, m;
+   
+   fp->hdr[0] = 0x21462;
+   fp->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */
+
+   if (ti->scan.uses_kill)
+      fp->hdr[0] |= 0x8000;
+   if (ti->scan.writes_z) {
+      fp->hdr[19] |= 0x2;
+      if (ti->scan.num_outputs > 2)
+         fp->hdr[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */
+   } else {
+   if (ti->scan.num_outputs > 1)
+      fp->hdr[0] |= 0x8000; /* FP_MULTIPLE_COLOR_OUTPUTS */
+   }
+
+   for (i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) {
+      m = ti->interp_mode[i];
+      for (c = 0; c < 4; ++c) {
+         if (!ti->input_access[i][c])
+            continue;
+         a = ti->input_loc[i][c] / 2;
+         if ((a & ~7) == 0x70/2)
+            fp->hdr[5] |= 1 << (28 + (a & 7) / 2); /* FRAG_COORD_UMASK */
+         else
+            fp->hdr[4 + a / 32] |= m << (a % 32);
+      }
+   }
+
+   for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) {
+      if (i != ti->fp_depth_output)
+         fp->hdr[18] |= 0xf << ti->output_loc[i][0];
+   }
+
+   return 0;
+}
+
+static boolean
+nvc0_prog_scan(struct nvc0_translation_info *ti)
+{
+   struct nvc0_program *prog = ti->prog;
+   struct tgsi_parse_context parse;
+   int ret;
+   unsigned i;
+
+#ifdef NOUVEAU_DEBUG_BITS
+   tgsi_dump(prog->pipe.tokens, 0);
+#endif
+
+   tgsi_scan_shader(prog->pipe.tokens, &ti->scan);
+
+   if (ti->prog->type == PIPE_SHADER_FRAGMENT) {
+      ti->fp_depth_output = 255;
+      for (i = 0; i < ti->scan.num_outputs; ++i)
+         if (ti->scan.output_semantic_name[i] == TGSI_SEMANTIC_POSITION)
+            ti->fp_depth_output = i;
+   }
+
+   ti->subr =
+      CALLOC(ti->scan.opcode_count[TGSI_OPCODE_BGNSUB], sizeof(ti->subr[0]));
+
+   ti->immd32 = (uint32_t *)MALLOC(ti->scan.immediate_count * 16);
+   ti->immd32_ty = (ubyte *)MALLOC(ti->scan.immediate_count * sizeof(ubyte));
+
+   ti->insns = MALLOC(ti->scan.num_instructions * sizeof(ti->insns[0]));
+
+   tgsi_parse_init(&parse, prog->pipe.tokens);
+   while (!tgsi_parse_end_of_tokens(&parse)) {
+      tgsi_parse_token(&parse);
+
+      switch (parse.FullToken.Token.Type) {
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+         prog_immediate(ti, &parse.FullToken.FullImmediate);
+         break;
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         prog_decl(ti, &parse.FullToken.FullDeclaration);
+         break;
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         ti->insns[ti->num_insns] = parse.FullToken.FullInstruction;
+         prog_inst(ti, &parse.FullToken.FullInstruction, ++ti->num_insns);
+         break;
+      default:
+         break;
+      }
+   }
+
+   for (i = 0; i < ti->num_subrs; ++i) {
+      unsigned pc = ti->subr[i].id;
+      while (ti->insns[pc].Instruction.Opcode != TGSI_OPCODE_ENDSUB)
+         prog_subroutine_inst(&ti->subr[i], &ti->insns[pc++]);
+   }
+
+   switch (prog->type) {
+   case PIPE_SHADER_VERTEX:
+      ti->input_file = NV_FILE_MEM_A;
+      ti->output_file = NV_FILE_MEM_V;
+      ret = nvc0_vp_gen_header(prog, ti);
+      break;
+      /*
+   case PIPE_SHADER_TESSELLATION_CONTROL:
+      ret = nvc0_tcp_gen_header(ti);
+      break;
+   case PIPE_SHADER_TESSELLATION_EVALUATION:
+      ret = nvc0_tep_gen_header(ti);
+      break;
+   case PIPE_SHADER_GEOMETRY:
+      ret = nvc0_gp_gen_header(ti);
+      break;
+      */
+   case PIPE_SHADER_FRAGMENT:
+      ti->input_file = NV_FILE_MEM_V;
+      ti->output_file = NV_FILE_GPR;
+      ret = nvc0_fp_gen_header(prog, ti);
+      break;
+   default:
+      assert(!"unsupported program type");
+      ret = -1;
+      break;
+   }
+
+   assert(!ret);
+   return ret;
+}
+
+boolean
+nvc0_program_translate(struct nvc0_program *prog)
+{
+   struct nvc0_translation_info *ti;
+   int ret;
+
+   ti = CALLOC_STRUCT(nvc0_translation_info);
+   ti->prog = prog;
+
+   ti->edgeflag_out = PIPE_MAX_SHADER_OUTPUTS;
+
+   ret = nvc0_prog_scan(ti);
+   if (ret) {
+      NOUVEAU_ERR("unsupported shader program\n");
+      goto out;
+   }
+
+   ret = nvc0_generate_code(ti);
+   if (ret)
+      NOUVEAU_ERR("shader translation failed\n");
+
+   {
+      unsigned i;
+      for (i = 0; i < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++i)
+         debug_printf("HDR[%02lx] = 0x%08x\n",
+                      i * sizeof(prog->hdr[0]), prog->hdr[i]);
+   }
+
+out:
+   if (ti->immd32)
+      FREE(ti->immd32);
+   if (ti->immd32_ty)
+      FREE(ti->immd32_ty);
+   if (ti->insns)
+      FREE(ti->insns);
+   if (ti->subr)
+      FREE(ti->subr);
+   FREE(ti);
+   return ret ? FALSE : TRUE;
+}
+
+void
+nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog)
+{
+   if (prog->res)
+      nouveau_resource_free(&prog->res);
+
+   if (prog->code)
+      FREE(prog->code);
+   if (prog->relocs)
+      FREE(prog->relocs);
+
+   prog->translated = FALSE;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_program.h b/src/gallium/drivers/nvc0/nvc0_program.h
new file mode 100644 (file)
index 0000000..42d9be3
--- /dev/null
@@ -0,0 +1,82 @@
+
+#ifndef __NVC0_PROGRAM_H__
+#define __NVC0_PROGRAM_H__
+
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_scan.h"
+
+#define NVC0_CAP_MAX_PROGRAM_TEMPS 64
+
+#define NVC0_SHADER_HEADER_SIZE (20 * 4)
+
+struct nvc0_program {
+   struct pipe_shader_state pipe;
+
+   ubyte type;
+   boolean translated;
+   ubyte max_gpr;
+
+   uint32_t *code;
+   unsigned code_base;
+   unsigned code_size;
+   unsigned parm_size;
+
+   uint32_t hdr[20];
+
+   uint32_t flags[2]; /* FP_ZORDER */
+
+   struct {
+      uint8_t edgeflag;
+   } vp;
+
+   void *relocs;
+   unsigned num_relocs;
+
+   struct nouveau_resource *res;
+};
+
+/* first 2 bits are written into the program header, for each input */
+#define NVC0_INTERP_FLAT          (1 << 0)
+#define NVC0_INTERP_PERSPECTIVE   (2 << 0)
+#define NVC0_INTERP_LINEAR        (3 << 0)
+#define NVC0_INTERP_CENTROID      (1 << 2)
+
+/* analyze TGSI and see which TEMP[] are used as subroutine inputs/outputs */
+struct nvc0_subroutine {
+   unsigned id;
+   unsigned first_insn;
+   uint32_t argv[NVC0_CAP_MAX_PROGRAM_TEMPS][4];
+   uint32_t retv[NVC0_CAP_MAX_PROGRAM_TEMPS][4];
+};
+
+struct nvc0_translation_info {
+   struct nvc0_program *prog;
+   struct tgsi_full_instruction *insns;
+   unsigned num_insns;
+   ubyte input_file;
+   ubyte output_file;
+   ubyte fp_depth_output;
+   uint16_t input_loc[PIPE_MAX_SHADER_INPUTS][4];
+   uint16_t output_loc[PIPE_MAX_SHADER_OUTPUTS][4];
+   uint16_t sysval_loc[TGSI_SEMANTIC_COUNT];
+   int input_access[PIPE_MAX_SHADER_INPUTS][4];
+   int output_access[PIPE_MAX_SHADER_OUTPUTS][4];
+   ubyte interp_mode[PIPE_MAX_SHADER_INPUTS];
+   boolean indirect_inputs;
+   boolean indirect_outputs;
+   boolean require_stores;
+   uint32_t *immd32;
+   ubyte *immd32_ty;
+   unsigned immd32_nr;
+   ubyte edgeflag_out;
+   struct nvc0_subroutine *subr;
+   unsigned num_subrs;
+   struct tgsi_shader_info scan;
+};
+
+int nvc0_generate_code(struct nvc0_translation_info *);
+
+void nvc0_relocate_program(struct nvc0_program *,
+                           uint32_t code_base, uint32_t data_base);
+
+#endif
diff --git a/src/gallium/drivers/nvc0/nvc0_push.c b/src/gallium/drivers/nvc0/nvc0_push.c
new file mode 100644 (file)
index 0000000..d201f31
--- /dev/null
@@ -0,0 +1,201 @@
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "translate/translate.h"
+
+#include "nvc0_context.h"
+#include "nvc0_resource.h"
+
+#include "nvc0_3d.xml.h"
+
+struct push_context {
+   struct nouveau_channel *chan;
+
+   void *idxbuf;
+   int32_t idxbias;
+
+   float edgeflag;
+   int edgeflat_attr;
+
+   uint32_t vertex_size;
+   uint32_t packet_vertex_limit;
+
+   struct translate *translate;
+};
+
+static void
+emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count)
+{
+   uint8_t *elts = (uint8_t *)ctx->idxbuf + start;
+
+   while (count) {
+      unsigned push = MIN2(count, ctx->packet_vertex_limit);
+      unsigned size = ctx->vertex_size * push;
+
+      BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size);
+
+      ctx->translate->run_elts8(ctx->translate, elts, push, 0, ctx->chan->cur);
+      ctx->chan->cur += size;
+      count -= push;
+      elts += push;
+   }
+}
+
+static void
+emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count)
+{
+   uint16_t *elts = (uint16_t *)ctx->idxbuf + start;
+
+   while (count) {
+      unsigned push = MIN2(count, ctx->packet_vertex_limit);
+      unsigned size = ctx->vertex_size * push;
+
+      BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size);
+
+      ctx->translate->run_elts16(ctx->translate, elts, push, 0, ctx->chan->cur);
+      ctx->chan->cur += size;
+      count -= push;
+      elts += push;
+   }
+}
+
+static void
+emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count)
+{
+   uint32_t *elts = (uint32_t *)ctx->idxbuf + start;
+
+   while (count) {
+      unsigned push = MIN2(count, ctx->packet_vertex_limit);
+      unsigned size = ctx->vertex_size * push;
+
+      BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size);
+
+      ctx->translate->run_elts(ctx->translate, elts, push, 0, ctx->chan->cur);
+      ctx->chan->cur += size;
+      count -= push;
+      elts += push;
+   }
+}
+
+static void
+emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count)
+{
+   while (count) {
+      unsigned push = MIN2(count, ctx->packet_vertex_limit);
+      unsigned size = ctx->vertex_size * push;
+
+      BEGIN_RING_NI(ctx->chan, RING_3D(VERTEX_DATA), size);
+
+      ctx->translate->run(ctx->translate, start, push, 0, ctx->chan->cur);
+      ctx->chan->cur += size;
+      count -= push;
+      start += push;
+   }
+}
+
+
+#define NVC0_PRIM_GL_CASE(n) \
+   case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
+
+static INLINE unsigned
+nvc0_prim_gl(unsigned prim)
+{
+   switch (prim) {
+   NVC0_PRIM_GL_CASE(POINTS);
+   NVC0_PRIM_GL_CASE(LINES);
+   NVC0_PRIM_GL_CASE(LINE_LOOP);
+   NVC0_PRIM_GL_CASE(LINE_STRIP);
+   NVC0_PRIM_GL_CASE(TRIANGLES);
+   NVC0_PRIM_GL_CASE(TRIANGLE_STRIP);
+   NVC0_PRIM_GL_CASE(TRIANGLE_FAN);
+   NVC0_PRIM_GL_CASE(QUADS);
+   NVC0_PRIM_GL_CASE(QUAD_STRIP);
+   NVC0_PRIM_GL_CASE(POLYGON);
+   NVC0_PRIM_GL_CASE(LINES_ADJACENCY);
+   NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
+   NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
+   NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
+   /*
+   NVC0_PRIM_GL_CASE(PATCHES); */
+   default:
+      return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
+      break;
+   }
+}
+
+void
+nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
+{
+   struct push_context ctx;
+   struct pipe_transfer *transfer = NULL;
+   unsigned i, index_size;
+   unsigned prim = nvc0_prim_gl(info->mode);
+   unsigned inst = info->instance_count;
+
+   ctx.chan = nvc0->screen->base.channel;
+   ctx.translate = nvc0->vertex->translate;
+   ctx.packet_vertex_limit = nvc0->vertex->vtx_per_packet_max;
+   ctx.vertex_size = nvc0->vertex->vtx_size;
+
+   for (i = 0; i < nvc0->num_vtxbufs; ++i) {
+      uint8_t *data;
+      struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[i];
+      struct nvc0_resource *res = nvc0_resource(vb->buffer);
+
+      if (nouveau_bo_map(res->bo, NOUVEAU_BO_RD))
+         return;
+      data = (uint8_t *)res->bo->map + vb->buffer_offset;
+      if (info->indexed)
+         data += info->index_bias * vb->stride;
+
+      ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0);
+   }
+
+   if (info->indexed) {
+      ctx.idxbuf = pipe_buffer_map(&nvc0->pipe, nvc0->idxbuf.buffer,
+                                   PIPE_TRANSFER_READ, &transfer);
+      if (!ctx.idxbuf)
+         return;
+      index_size = nvc0->idxbuf.index_size;
+   } else {
+      ctx.idxbuf = NULL;
+      index_size = 0;
+   }
+
+   while (inst--) {
+      BEGIN_RING(ctx.chan, RING_3D(VERTEX_BEGIN_GL), 1);
+      OUT_RING  (ctx.chan, prim);
+      switch (index_size) {
+      case 0:
+         emit_vertices_seq(&ctx, info->start, info->count);
+         break;
+      case 1:
+         emit_vertices_i08(&ctx, info->start, info->count);
+         break;
+      case 2:
+         emit_vertices_i16(&ctx, info->start, info->count);
+         break;
+      case 4:
+         emit_vertices_i32(&ctx, info->start, info->count);
+         break;
+      default:
+         assert(0);
+         break;
+      }
+      INLIN_RING(ctx.chan, RING_3D(VERTEX_END_GL), 0);
+
+      prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+   }
+
+   if (info->indexed)
+          pipe_buffer_unmap(&nvc0->pipe, nvc0->idxbuf.buffer, transfer);
+
+   for (i = 0; i < nvc0->num_vtxbufs; ++i) {
+      struct nvc0_resource *res = nvc0_resource(nvc0->vtxbuf[i].buffer);
+
+      if (res->bo)
+         nouveau_bo_unmap(res->bo);
+   }
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_push2.c b/src/gallium/drivers/nvc0/nvc0_push2.c
new file mode 100644 (file)
index 0000000..3f9359c
--- /dev/null
@@ -0,0 +1,329 @@
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "translate/translate.h"
+
+#include "nvc0_context.h"
+#include "nvc0_resource.h"
+
+#include "nvc0_3d.xml.h"
+
+struct push_context {
+   struct nvc0_context *nvc0;
+
+   uint vertex_size;
+
+   void *idxbuf;
+   uint idxsize;
+
+   float edgeflag;
+   int edgeflag_input;
+
+   struct {
+      void *map;
+      void (*push)(struct nouveau_channel *, void *);
+      uint32_t stride;
+      uint32_t divisor;
+      uint32_t step;
+   } attr[32];
+   int num_attrs;
+};
+
+static void
+emit_b32_1(struct nouveau_channel *chan, void *data)
+{
+   uint32_t *v = data;
+
+   OUT_RING(chan, v[0]);
+}
+
+static void
+emit_b32_2(struct nouveau_channel *chan, void *data)
+{
+   uint32_t *v = data;
+
+   OUT_RING(chan, v[0]);
+   OUT_RING(chan, v[1]);
+}
+
+static void
+emit_b32_3(struct nouveau_channel *chan, void *data)
+{
+   uint32_t *v = data;
+
+   OUT_RING(chan, v[0]);
+   OUT_RING(chan, v[1]);
+   OUT_RING(chan, v[2]);
+}
+
+static void
+emit_b32_4(struct nouveau_channel *chan, void *data)
+{
+   uint32_t *v = data;
+
+   OUT_RING(chan, v[0]);
+   OUT_RING(chan, v[1]);
+   OUT_RING(chan, v[2]);
+   OUT_RING(chan, v[3]);
+}
+
+static void
+emit_b16_1(struct nouveau_channel *chan, void *data)
+{
+   uint16_t *v = data;
+
+   OUT_RING(chan, v[0]);
+}
+
+static void
+emit_b16_3(struct nouveau_channel *chan, void *data)
+{
+   uint16_t *v = data;
+
+   OUT_RING(chan, (v[1] << 16) | v[0]);
+   OUT_RING(chan, v[2]);
+}
+
+static void
+emit_b08_1(struct nouveau_channel *chan, void *data)
+{
+   uint8_t *v = data;
+
+   OUT_RING(chan, v[0]);
+}
+
+static void
+emit_b08_3(struct nouveau_channel *chan, void *data)
+{
+   uint8_t *v = data;
+
+   OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]);
+}
+
+static void
+emit_b64_1(struct nouveau_channel *chan, void *data)
+{
+   double *v = data;
+
+   OUT_RINGf(chan, v[0]);
+}
+
+static void
+emit_b64_2(struct nouveau_channel *chan, void *data)
+{
+   double *v = data;
+
+   OUT_RINGf(chan, v[0]);
+   OUT_RINGf(chan, v[1]);
+}
+
+static void
+emit_b64_3(struct nouveau_channel *chan, void *data)
+{
+   double *v = data;
+
+   OUT_RINGf(chan, v[0]);
+   OUT_RINGf(chan, v[1]);
+   OUT_RINGf(chan, v[2]);
+}
+
+static void
+emit_b64_4(struct nouveau_channel *chan, void *data)
+{
+   double *v = data;
+
+   OUT_RINGf(chan, v[0]);
+   OUT_RINGf(chan, v[1]);
+   OUT_RINGf(chan, v[2]);
+   OUT_RINGf(chan, v[3]);   
+}
+
+static INLINE void
+emit_vertex(struct push_context *ctx, unsigned n)
+{
+   struct nouveau_channel *chan = ctx->nvc0->screen->base.channel;
+   int i;
+
+   if (ctx->edgeflag_input < 32) {
+      /* TODO */
+   }
+
+   BEGIN_RING_NI(chan, RING_3D(VERTEX_DATA), ctx->vertex_size);
+   for (i = 0; i < ctx->num_attrs; ++i)
+      ctx->attr[i].push(chan,
+                        (uint8_t *)ctx->attr[i].map + n * ctx->attr[i].stride);
+}
+
+static void
+emit_edgeflag(struct push_context *ctx, boolean enabled)
+{
+   struct nouveau_channel *chan = ctx->nvc0->screen->base.channel;
+   
+   INLIN_RING(chan, RING_3D(EDGEFLAG_ENABLE), enabled);
+}
+
+static void
+emit_elt08(struct push_context *ctx, unsigned start, unsigned count)
+{
+   uint8_t *idxbuf = ctx->idxbuf;
+
+   while (count--)
+      emit_vertex(ctx, idxbuf[start++]);
+}
+
+static void
+emit_elt16(struct push_context *ctx, unsigned start, unsigned count)
+{
+   uint16_t *idxbuf = ctx->idxbuf;
+
+   while (count--)
+      emit_vertex(ctx, idxbuf[start++]);
+}
+
+static void
+emit_elt32(struct push_context *ctx, unsigned start, unsigned count)
+{
+   uint32_t *idxbuf = ctx->idxbuf;
+
+   while (count--)
+      emit_vertex(ctx, idxbuf[start++]);
+}
+
+static void
+emit_seq(struct push_context *ctx, unsigned start, unsigned count)
+{
+   while (count--)
+      emit_vertex(ctx, start++);
+}
+
+#define NVC0_PRIM_GL_CASE(n) \
+   case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
+
+static INLINE unsigned
+nvc0_prim_gl(unsigned prim)
+{
+   switch (prim) {
+   NVC0_PRIM_GL_CASE(POINTS);
+   NVC0_PRIM_GL_CASE(LINES);
+   NVC0_PRIM_GL_CASE(LINE_LOOP);
+   NVC0_PRIM_GL_CASE(LINE_STRIP);
+   NVC0_PRIM_GL_CASE(TRIANGLES);
+   NVC0_PRIM_GL_CASE(TRIANGLE_STRIP);
+   NVC0_PRIM_GL_CASE(TRIANGLE_FAN);
+   NVC0_PRIM_GL_CASE(QUADS);
+   NVC0_PRIM_GL_CASE(QUAD_STRIP);
+   NVC0_PRIM_GL_CASE(POLYGON);
+   NVC0_PRIM_GL_CASE(LINES_ADJACENCY);
+   NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
+   NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
+   NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
+   /*
+   NVC0_PRIM_GL_CASE(PATCHES); */
+   default:
+      return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
+      break;
+   }
+}
+
+void
+nvc0_push_vbo2(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
+{
+   struct push_context ctx;
+   unsigned i, n;
+   unsigned inst = info->instance_count;
+   unsigned prim = nvc0_prim_gl(info->mode);
+
+   ctx.nvc0 = nvc0;
+   ctx.vertex_size = nvc0->vertex->vtx_size;
+   ctx.idxbuf = NULL;
+   ctx.num_attrs = 0;
+   ctx.edgeflag = 0.5f;
+   ctx.edgeflag_input = 32;
+
+   for (i = 0; i < nvc0->vertex->num_elements; ++i) {
+      struct pipe_vertex_element *ve = &nvc0->vertex->element[i].pipe;
+      struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[ve->vertex_buffer_index];
+      struct nouveau_bo *bo = nvc0_resource(vb->buffer)->bo;
+      unsigned nr_components;
+
+      if (!(nvc0->vbo_fifo & (1 << i)))
+         continue;
+      n = ctx.num_attrs++;
+
+      if (nouveau_bo_map(bo, NOUVEAU_BO_RD))
+         return;
+      ctx.attr[n].map = (uint8_t *)bo->map + vb->buffer_offset + ve->src_offset;
+
+      nouveau_bo_unmap(bo);
+
+      ctx.attr[n].stride = vb->stride;
+      ctx.attr[n].divisor = ve->instance_divisor;
+
+      nr_components = util_format_get_nr_components(ve->src_format);
+      switch (util_format_get_component_bits(ve->src_format,
+                                             UTIL_FORMAT_COLORSPACE_RGB, 0)) {
+      case 8:
+         switch (nr_components) {
+         case 1: ctx.attr[n].push = emit_b08_1; break;
+         case 2: ctx.attr[n].push = emit_b16_1; break;
+         case 3: ctx.attr[n].push = emit_b08_3; break;
+         case 4: ctx.attr[n].push = emit_b32_1; break;
+         }
+         break;
+      case 16:
+         switch (nr_components) {
+         case 1: ctx.attr[n].push = emit_b16_1; break;
+         case 2: ctx.attr[n].push = emit_b32_1; break;
+         case 3: ctx.attr[n].push = emit_b16_3; break;
+         case 4: ctx.attr[n].push = emit_b32_2; break;
+         }
+         break;
+      case 32:
+         switch (nr_components) {
+         case 1: ctx.attr[n].push = emit_b32_1; break;
+         case 2: ctx.attr[n].push = emit_b32_2; break;
+         case 3: ctx.attr[n].push = emit_b32_3; break;
+         case 4: ctx.attr[n].push = emit_b32_4; break;
+         }
+         break;
+      default:
+         assert(0);
+         break;
+      }
+   }
+
+   if (info->indexed) {
+      struct nvc0_resource *res = nvc0_resource(nvc0->idxbuf.buffer);
+      if (!res || nouveau_bo_map(res->bo, NOUVEAU_BO_RD))
+         return;
+      ctx.idxbuf = res->bo->map;
+      nouveau_bo_unmap(res->bo);
+      ctx.idxsize = nvc0->idxbuf.index_size;
+   } else {
+      ctx.idxsize = 0;
+   }
+
+   while (inst--) {
+      BEGIN_RING(nvc0->screen->base.channel, RING_3D(VERTEX_BEGIN_GL), 1);
+      OUT_RING  (nvc0->screen->base.channel, prim);
+      switch (ctx.idxsize) {
+      case 0:
+         emit_seq(&ctx, info->start, info->count);
+         break;
+      case 1:
+         emit_elt08(&ctx, info->start, info->count);
+         break;
+      case 2:
+         emit_elt16(&ctx, info->start, info->count);
+         break;
+      case 4:
+         emit_elt32(&ctx, info->start, info->count);
+         break;
+      }
+      INLIN_RING(nvc0->screen->base.channel, RING_3D(VERTEX_END_GL), 0);
+
+      prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+   }
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_resource.c b/src/gallium/drivers/nvc0/nvc0_resource.c
new file mode 100644 (file)
index 0000000..181d917
--- /dev/null
@@ -0,0 +1,71 @@
+
+#include "pipe/p_context.h"
+#include "nvc0_resource.h"
+#include "nouveau/nouveau_screen.h"
+
+static unsigned
+nvc0_resource_is_referenced(struct pipe_context *pipe,
+                            struct pipe_resource *resource,
+                            unsigned face, unsigned level)
+{
+   struct nvc0_resource *res = nvc0_resource(resource);
+   unsigned flags = 0;
+
+#ifdef NOUVEAU_USERSPACE_MM
+   flags = res->status;
+#else
+   unsigned bo_flags = nouveau_bo_pending(res->bo);
+   if (bo_flags & NOUVEAU_BO_RD)
+      flags = PIPE_REFERENCED_FOR_READ;
+   if (bo_flags & NOUVEAU_BO_WR)
+      flags |= PIPE_REFERENCED_FOR_WRITE;
+#endif
+   return flags;
+}
+
+static struct pipe_resource *
+nvc0_resource_create(struct pipe_screen *screen,
+                     const struct pipe_resource *templ)
+{
+   switch (templ->target) {
+   case PIPE_BUFFER:
+      return nvc0_buffer_create(screen, templ);
+   default:
+      return nvc0_miptree_create(screen, templ);
+   }
+}
+
+static struct pipe_resource *
+nvc0_resource_from_handle(struct pipe_screen * screen,
+                          const struct pipe_resource *templ,
+                          struct winsys_handle *whandle)
+{
+   if (templ->target == PIPE_BUFFER)
+      return NULL;
+   else
+      return nvc0_miptree_from_handle(screen, templ, whandle);
+}
+
+void
+nvc0_init_resource_functions(struct pipe_context *pcontext)
+{
+   pcontext->get_transfer = u_get_transfer_vtbl;
+   pcontext->transfer_map = u_transfer_map_vtbl;
+   pcontext->transfer_flush_region = u_transfer_flush_region_vtbl;
+   pcontext->transfer_unmap = u_transfer_unmap_vtbl;
+   pcontext->transfer_destroy = u_transfer_destroy_vtbl;
+   pcontext->transfer_inline_write = u_transfer_inline_write_vtbl;
+   pcontext->is_resource_referenced = nvc0_resource_is_referenced;
+}
+
+void
+nvc0_screen_init_resource_functions(struct pipe_screen *pscreen)
+{
+   pscreen->resource_create = nvc0_resource_create;
+   pscreen->resource_from_handle = nvc0_resource_from_handle;
+   pscreen->resource_get_handle = u_resource_get_handle_vtbl;
+   pscreen->resource_destroy = u_resource_destroy_vtbl;
+   pscreen->user_buffer_create = nvc0_user_buffer_create;
+   pscreen->get_tex_surface = nvc0_miptree_surface_new;
+   pscreen->tex_surface_destroy = nvc0_miptree_surface_del;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_resource.h b/src/gallium/drivers/nvc0/nvc0_resource.h
new file mode 100644 (file)
index 0000000..9c6895e
--- /dev/null
@@ -0,0 +1,109 @@
+
+#ifndef __NVC0_RESOURCE_H__
+#define __NVC0_RESOURCE_H__
+
+#include "util/u_transfer.h"
+#include "util/u_double_list.h"
+#define NOUVEAU_NVC0
+#include "nouveau/nouveau_winsys.h"
+#undef NOUVEAU_NVC0
+
+#include "nvc0_fence.h"
+
+struct pipe_resource;
+struct nouveau_bo;
+
+/* Resources, if mapped into the GPU's address space, are guaranteed to
+ * have constant virtual addresses.
+ * The address of a resource will lie within the nouveau_bo referenced,
+ * and this bo should be added to the memory manager's validation list.
+ */
+struct nvc0_resource {
+   struct pipe_resource base;
+   const struct u_resource_vtbl *vtbl;
+   uint64_t address;
+
+   uint8_t *data;
+   struct nouveau_bo *bo;
+   uint32_t offset;
+
+   uint8_t status;
+   uint8_t domain;
+   struct nvc0_fence *fence;
+   struct list_head list;
+};
+
+#define NVC0_TILE_H(m) (8 << ((m >> 4) & 0xf))
+#define NVC0_TILE_D(m) (1 << (m >> 8))
+
+struct nvc0_miptree_level {
+   int *image_offset;
+   uint32_t pitch;
+   uint32_t tile_mode;
+};
+
+#define NVC0_MAX_TEXTURE_LEVELS 16
+
+struct nvc0_miptree {
+   struct nvc0_resource base;
+   struct nvc0_miptree_level level[NVC0_MAX_TEXTURE_LEVELS];
+   int image_nr;
+   int total_size;
+};
+
+static INLINE struct nvc0_miptree *
+nvc0_miptree(struct pipe_resource *pt)
+{
+   return (struct nvc0_miptree *)pt;
+}
+
+static INLINE struct nvc0_resource *
+nvc0_resource(struct pipe_resource *resource)
+{
+   return (struct nvc0_resource *)resource;
+}
+
+/* is resource mapped into the GPU's address space (i.e. VRAM or GART) ? */
+static INLINE boolean
+nvc0_resource_mapped_by_gpu(struct pipe_resource *resource)
+{
+   return nvc0_resource(resource)->bo->offset != 0ULL;
+}
+
+void
+nvc0_init_resource_functions(struct pipe_context *pcontext);
+
+void
+nvc0_screen_init_resource_functions(struct pipe_screen *pscreen);
+
+/* Internal functions:
+ */
+struct pipe_resource *
+nvc0_miptree_create(struct pipe_screen *pscreen,
+                    const struct pipe_resource *tmp);
+
+struct pipe_resource *
+nvc0_miptree_from_handle(struct pipe_screen *pscreen,
+                         const struct pipe_resource *template,
+                         struct winsys_handle *whandle);
+
+struct pipe_resource *
+nvc0_buffer_create(struct pipe_screen *pscreen,
+                   const struct pipe_resource *templ);
+
+struct pipe_resource *
+nvc0_user_buffer_create(struct pipe_screen *screen,
+                        void *ptr,
+                        unsigned bytes,
+                        unsigned usage);
+
+
+struct pipe_surface *
+nvc0_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt,
+                         unsigned face, unsigned level, unsigned zslice,
+                         unsigned flags);
+
+void
+nvc0_miptree_surface_del(struct pipe_surface *ps);
+
+#endif
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c
new file mode 100644 (file)
index 0000000..9768de9
--- /dev/null
@@ -0,0 +1,604 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "util/u_format_s3tc.h"
+#include "pipe/p_screen.h"
+
+#include "nvc0_fence.h"
+#include "nvc0_context.h"
+#include "nvc0_screen.h"
+
+#include "nouveau/nv_object.xml.h"
+#include "nvc0_graph_macros.h"
+
+static boolean
+nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
+                                enum pipe_format format,
+                                enum pipe_texture_target target,
+                                unsigned sample_count,
+                                unsigned bindings, unsigned geom_flags)
+{
+   if (sample_count > 1)
+      return FALSE;
+
+   if (!util_format_s3tc_enabled) {
+      switch (format) {
+      case PIPE_FORMAT_DXT1_RGB:
+      case PIPE_FORMAT_DXT1_RGBA:
+      case PIPE_FORMAT_DXT3_RGBA:
+      case PIPE_FORMAT_DXT5_RGBA:
+         return FALSE;
+      default:
+         break;
+      }
+   }
+
+   /* transfers & shared are always supported */
+   bindings &= ~(PIPE_BIND_TRANSFER_READ |
+                 PIPE_BIND_TRANSFER_WRITE |
+                 PIPE_BIND_SHARED);
+
+   return (nvc0_format_table[format].usage & bindings) == bindings;
+}
+
+static int
+nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
+{
+   switch (param) {
+   case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+   case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
+      return 32;
+   case PIPE_CAP_MAX_COMBINED_SAMPLERS:
+      return 64;
+   case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+      return 13;
+   case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+      return 10;
+   case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+      return 13;
+   case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+   case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
+   case PIPE_CAP_TEXTURE_SWIZZLE:
+   case PIPE_CAP_TEXTURE_SHADOW_MAP:
+   case PIPE_CAP_NPOT_TEXTURES:
+   case PIPE_CAP_ANISOTROPIC_FILTER:
+      return 1;
+   case PIPE_CAP_TWO_SIDED_STENCIL:
+   case PIPE_CAP_DEPTH_CLAMP:
+   case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
+   case PIPE_CAP_POINT_SPRITE:
+      return 1;
+   case PIPE_CAP_GLSL:
+   case PIPE_CAP_SM3:
+      return 1;
+   case PIPE_CAP_MAX_RENDER_TARGETS:
+      return 8;
+   case PIPE_CAP_OCCLUSION_QUERY:
+      return 1;
+   case PIPE_CAP_TIMER_QUERY:
+   case PIPE_CAP_STREAM_OUTPUT:
+      return 0;
+   case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+   case PIPE_CAP_INDEP_BLEND_ENABLE:
+   case PIPE_CAP_INDEP_BLEND_FUNC:
+      return 1;
+   case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+   case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+      return 1;
+   case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
+   case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
+      return 0;
+   case PIPE_CAP_SHADER_STENCIL_EXPORT:
+      return 0;
+   case PIPE_CAP_PRIMITIVE_RESTART:
+      return 0;
+   default:
+      NOUVEAU_ERR("unknown PIPE_CAP %d\n", param);
+      return 0;
+   }
+}
+
+static int
+nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
+                             enum pipe_shader_cap param)
+{
+   switch (shader) {
+   case PIPE_SHADER_VERTEX:
+      /*
+   case PIPE_SHADER_TESSELLATION_CONTROL:
+   case PIPE_SHADER_TESSELLATION_EVALUATION:
+      */
+   case PIPE_SHADER_GEOMETRY:
+   case PIPE_SHADER_FRAGMENT:
+      break;
+   default:
+      return 0;
+   }
+   
+   switch (param) {
+   case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+   case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+   case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+   case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+      return 16384;
+   case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+      return 4;
+   case PIPE_SHADER_CAP_MAX_INPUTS:
+      if (shader == PIPE_SHADER_VERTEX)
+         return 32;
+      return 0x300 / 16;
+   case PIPE_SHADER_CAP_MAX_CONSTS:
+      return 65536 / 16;
+   case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+      return 14;
+   case PIPE_SHADER_CAP_MAX_ADDRS:
+      return 1;
+   case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+   case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+      return shader != PIPE_SHADER_FRAGMENT;
+   case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+   case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+      return 1;
+   case PIPE_SHADER_CAP_MAX_PREDS:
+      return 0;
+   case PIPE_SHADER_CAP_MAX_TEMPS:
+      return NVC0_CAP_MAX_PROGRAM_TEMPS;
+   case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+      return 1;
+   default:
+      NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
+      return 0;
+   }
+}
+
+static float
+nvc0_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_cap param)
+{
+   switch (param) {
+   case PIPE_CAP_MAX_LINE_WIDTH:
+   case PIPE_CAP_MAX_LINE_WIDTH_AA:
+      return 10.0f;
+   case PIPE_CAP_MAX_POINT_WIDTH:
+   case PIPE_CAP_MAX_POINT_WIDTH_AA:
+      return 64.0f;
+   case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+      return 16.0f;
+   case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+      return 4.0f;
+   default:
+      NOUVEAU_ERR("unknown PIPE_CAP %d\n", param);
+      return 0.0f;
+   }
+}
+
+static void
+nvc0_screen_destroy(struct pipe_screen *pscreen)
+{
+   struct nvc0_screen *screen = nvc0_screen(pscreen);
+
+   nouveau_bo_ref(NULL, &screen->text);
+   nouveau_bo_ref(NULL, &screen->tls);
+   nouveau_bo_ref(NULL, &screen->txc);
+   nouveau_bo_ref(NULL, &screen->fence.bo);
+   nouveau_bo_ref(NULL, &screen->mp_stack_bo);
+
+   nouveau_resource_destroy(&screen->text_heap);
+
+   if (screen->tic.entries)
+      FREE(screen->tic.entries);
+
+   nouveau_screen_fini(&screen->base);
+
+   FREE(screen);
+}
+
+static int
+nvc0_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos,
+                     unsigned size, const uint32_t *data)
+{
+   struct nouveau_channel *chan = screen->base.channel;
+
+   size /= 4;
+
+   BEGIN_RING(chan, RING_ANY(NVC0_GRAPH_MACRO_ID), 2);
+   OUT_RING  (chan, (m - 0x3800) / 8);
+   OUT_RING  (chan, pos);
+   BEGIN_RING_1I(chan, RING_ANY(NVC0_GRAPH_MACRO_UPLOAD_POS), size + 1);
+   OUT_RING  (chan, pos);
+   OUT_RINGp (chan, data, size);
+
+   return pos + size;
+}
+
+static int
+nvc0_screen_fence_finish(struct pipe_screen *pscreen,
+                         struct pipe_fence_handle *pfence,
+                         unsigned flags)
+{
+   return nvc0_fence_wait((struct nvc0_fence *)pfence) != TRUE;
+}
+
+static void
+nvc0_magic_3d_init(struct nouveau_channel *chan)
+{
+   BEGIN_RING(chan, RING_3D_(0x10cc), 1);
+   OUT_RING  (chan, 0xff);
+   BEGIN_RING(chan, RING_3D_(0x10e0), 2);
+   OUT_RING(chan, 0xff);
+   OUT_RING(chan, 0xff);
+   BEGIN_RING(chan, RING_3D_(0x10ec), 2);
+   OUT_RING(chan, 0xff);
+   OUT_RING(chan, 0xff);
+   BEGIN_RING(chan, RING_3D_(0x074c), 1);
+   OUT_RING  (chan, 0x3f);
+
+   BEGIN_RING(chan, RING_3D_(0x10f8), 1);
+   OUT_RING  (chan, 0x0101);
+
+   BEGIN_RING(chan, RING_3D_(0x16a8), 1);
+   OUT_RING  (chan, (3 << 16) | 3);
+   BEGIN_RING(chan, RING_3D_(0x1794), 1);
+   OUT_RING  (chan, (2 << 16) | 2);
+   BEGIN_RING(chan, RING_3D_(0x0de8), 1);
+   OUT_RING  (chan, 1);
+   BEGIN_RING(chan, RING_3D_(0x165c), 1);
+   OUT_RING  (chan, 0);
+
+   BEGIN_RING(chan, RING_3D_(0x1528), 1); /* MP poke */
+   OUT_RING  (chan, 0);
+
+   BEGIN_RING(chan, RING_3D_(0x12ac), 1);
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_3D_(0x0218), 1);
+   OUT_RING  (chan, 0x10);
+   BEGIN_RING(chan, RING_3D_(0x10fc), 1);
+   OUT_RING  (chan, 0x10);
+   BEGIN_RING(chan, RING_3D_(0x1290), 1);
+   OUT_RING  (chan, 0x10);
+   BEGIN_RING(chan, RING_3D_(0x12d8), 2);
+   OUT_RING  (chan, 0x10);
+   OUT_RING  (chan, 0x10);
+   BEGIN_RING(chan, RING_3D_(0x06d4), 1);
+   OUT_RING  (chan, 8);
+   BEGIN_RING(chan, RING_3D_(0x1140), 1);
+   OUT_RING  (chan, 0x10);
+   BEGIN_RING(chan, RING_3D_(0x1610), 1);
+   OUT_RING  (chan, 0xe);
+
+   BEGIN_RING(chan, RING_3D_(0x164c), 1);
+   OUT_RING  (chan, 1 << 12);
+   BEGIN_RING(chan, RING_3D_(0x151c), 1);
+   OUT_RING  (chan, 1);
+   BEGIN_RING(chan, RING_3D_(0x020c), 1);
+   OUT_RING  (chan, 1);
+   BEGIN_RING(chan, RING_3D_(0x030c), 1);
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_3D_(0x0300), 1);
+   OUT_RING  (chan, 3);
+   BEGIN_RING(chan, RING_3D_(0x1280), 1); /* PGRAPH poke */
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_3D_(0x02d0), 1);
+   OUT_RING  (chan, 0x1f40);
+   BEGIN_RING(chan, RING_3D_(0x00fdc), 1);
+   OUT_RING  (chan, 1);
+   BEGIN_RING(chan, RING_3D_(0x19c0), 1);
+   OUT_RING  (chan, 1);
+   BEGIN_RING(chan, RING_3D_(0x075c), 1);
+   OUT_RING  (chan, 3);
+
+   BEGIN_RING(chan, RING_3D_(0x0fac), 1);
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_3D_(0x0f90), 1);
+   OUT_RING  (chan, 0);
+}
+
+struct pipe_screen *
+nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
+{
+   struct nvc0_screen *screen;
+   struct nouveau_channel *chan;
+   struct pipe_screen *pscreen;
+   int ret;
+   unsigned i;
+
+   screen = CALLOC_STRUCT(nvc0_screen);
+   if (!screen)
+      return NULL;
+   pscreen = &screen->base.base;
+
+   ret = nouveau_screen_init(&screen->base, dev);
+   if (ret) {
+      nvc0_screen_destroy(pscreen);
+      return NULL;
+   }
+   chan = screen->base.channel;
+
+   pscreen->winsys = ws;
+   pscreen->destroy = nvc0_screen_destroy;
+   pscreen->context_create = nvc0_create;
+   pscreen->is_format_supported = nvc0_screen_is_format_supported;
+   pscreen->get_param = nvc0_screen_get_param;
+   pscreen->get_shader_param = nvc0_screen_get_shader_param;
+   pscreen->get_paramf = nvc0_screen_get_paramf;
+   pscreen->fence_finish = nvc0_screen_fence_finish;
+
+   nvc0_screen_init_resource_functions(pscreen);
+
+   screen->base.vertex_buffer_flags = NOUVEAU_BO_GART;
+   screen->base.index_buffer_flags = 0;
+
+   ret = nouveau_bo_new(dev, NOUVEAU_BO_GART, 0, 4096, &screen->fence.bo);
+   if (ret)
+      goto fail;
+   nouveau_bo_map(screen->fence.bo, NOUVEAU_BO_RDWR);
+   screen->fence.map = screen->fence.bo->map;
+   nouveau_bo_unmap(screen->fence.bo);
+
+   BEGIN_RING(chan, RING_MF_(0x0000), 1);
+   OUT_RING  (chan, 0x9039);
+   BEGIN_RING(chan, RING_MF(NOTIFY_ADDRESS_HIGH), 3);
+   OUT_RELOCh(chan, screen->fence.bo, 16, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR);
+   OUT_RELOCl(chan, screen->fence.bo, 16, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR);
+   OUT_RING  (chan, 0);
+
+   BEGIN_RING(chan, RING_2D_(0x0000), 1);
+   OUT_RING  (chan, 0x902d);
+   BEGIN_RING(chan, RING_2D(OPERATION), 1);
+   OUT_RING  (chan, NVC0_2D_OPERATION_SRCCOPY);
+   BEGIN_RING(chan, RING_2D(CLIP_ENABLE), 1);
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_2D(COLOR_KEY_ENABLE), 1);
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_2D_(0x0884), 1);
+   OUT_RING  (chan, 0x3f);
+   BEGIN_RING(chan, RING_2D_(0x0888), 1);
+   OUT_RING  (chan, 1);
+
+   BEGIN_RING(chan, RING_3D_(0x0000), 1);
+   OUT_RING  (chan, 0x9097);
+   BEGIN_RING(chan, RING_3D(NOTIFY_ADDRESS_HIGH), 3);
+   OUT_RELOCh(chan, screen->fence.bo, 32, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR);
+   OUT_RELOCl(chan, screen->fence.bo, 32, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR);
+   OUT_RING  (chan, 0);
+
+   BEGIN_RING(chan, RING_3D(COND_MODE), 1);
+   OUT_RING  (chan, NVC0_3D_COND_MODE_ALWAYS);
+
+   BEGIN_RING(chan, RING_3D(RT_CONTROL), 1);
+   OUT_RING  (chan, 1);
+
+   BEGIN_RING(chan, RING_3D(MULTISAMPLE_ZETA_ENABLE), 1);
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_3D(MULTISAMPLE_COLOR_ENABLE), 1);
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_3D(MULTISAMPLE_MODE), 1);
+   OUT_RING  (chan, NVC0_3D_MULTISAMPLE_MODE_1X);
+   BEGIN_RING(chan, RING_3D(MULTISAMPLE_CTRL), 1);
+   OUT_RING  (chan, 0);
+
+   nvc0_magic_3d_init(chan);
+
+   ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, &screen->text);
+   if (ret)
+      goto fail;
+   /* nouveau_bo_pin(dev, screen->text); */
+
+   nouveau_resource_init(&screen->text_heap, 0, 1 << 20);
+
+   ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 12, 5 << 16,
+                        &screen->uniforms);
+   if (ret)
+      goto fail;
+
+   screen->tls_size = 4 * 4 * 32 * 128 * 4;
+   ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17,
+                        screen->tls_size, &screen->tls);
+   if (ret)
+      goto fail;
+
+   BEGIN_RING(chan, RING_3D(CODE_ADDRESS_HIGH), 2);
+   OUT_RELOCh(chan, screen->text, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+   OUT_RELOCl(chan, screen->text, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+   BEGIN_RING(chan, RING_3D(LOCAL_ADDRESS_HIGH), 4);
+   OUT_RELOCh(chan, screen->tls, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+   OUT_RELOCl(chan, screen->tls, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+   OUT_RING  (chan, screen->tls_size >> 32);
+   OUT_RING  (chan, screen->tls_size);
+   BEGIN_RING(chan, RING_3D(LOCAL_BASE), 1);
+   OUT_RING  (chan, 0);
+
+   for (i = 0; i < 5; ++i) {
+      BEGIN_RING(chan, RING_3D(TEX_LIMITS(i)), 1);
+      OUT_RING  (chan, 0x54);
+   }
+   BEGIN_RING(chan, RING_3D(LINKED_TSC), 1);
+   OUT_RING  (chan, 0);
+
+   ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20,
+                        &screen->mp_stack_bo);
+   if (ret)
+      goto fail;
+   /* nouveau_bo_pin(dev, screen->mp_stack_bo); */
+
+   BEGIN_RING(chan, RING_3D_(0x17bc), 3);
+   OUT_RELOCh(chan, screen->mp_stack_bo, 0, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR);
+   OUT_RELOCl(chan, screen->mp_stack_bo, 0, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR);
+   OUT_RING  (chan, 1);
+
+   ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 17, &screen->txc);
+   if (ret)
+      goto fail;
+   /* nouveau_bo_pin(dev, screen->txc); */
+
+   BEGIN_RING(chan, RING_3D(TIC_ADDRESS_HIGH), 3);
+   OUT_RELOCh(chan, screen->txc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+   OUT_RELOCl(chan, screen->txc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+   OUT_RING  (chan, NVC0_TIC_MAX_ENTRIES - 1);
+
+   BEGIN_RING(chan, RING_3D(TSC_ADDRESS_HIGH), 3);
+   OUT_RELOCh(chan, screen->txc, 65536, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+   OUT_RELOCl(chan, screen->txc, 65536, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+   OUT_RING  (chan, NVC0_TSC_MAX_ENTRIES - 1);
+
+   BEGIN_RING(chan, RING_3D(Y_ORIGIN_BOTTOM), 1);
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_3D(WINDOW_OFFSET_X), 2);
+   OUT_RING  (chan, 0);
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_3D_(0x1590), 1); /* deactivate ZCULL */
+   OUT_RING  (chan, 0x3f);
+
+   BEGIN_RING(chan, RING_3D(VIEWPORT_CLIP_RECTS_EN), 1);
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_3D(CLIPID_ENABLE), 1);
+   OUT_RING  (chan, 0);
+
+   BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSFORM_EN), 1);
+   OUT_RING  (chan, 1);
+   BEGIN_RING(chan, RING_3D(DEPTH_RANGE_NEAR(0)), 2);
+   OUT_RINGf (chan, 0.0f);
+   OUT_RINGf (chan, 1.0f);
+
+   /* We use scissors instead of exact view volume clipping,
+    * so they're always enabled.
+    */
+   BEGIN_RING(chan, RING_3D(SCISSOR_ENABLE(0)), 3);
+   OUT_RING  (chan, 1);
+   OUT_RING  (chan, 8192 << 16);
+   OUT_RING  (chan, 8192 << 16);
+
+   BEGIN_RING(chan, RING_3D_(0x0fac), 1);
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_3D_(0x3484), 1);
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_3D_(0x0dbc), 1);
+   OUT_RING  (chan, 0x00010000);
+   BEGIN_RING(chan, RING_3D_(0x0dd8), 1);
+   OUT_RING  (chan, 0xff800006);
+   BEGIN_RING(chan, RING_3D_(0x3488), 1);
+   OUT_RING  (chan, 0);
+
+#define MK_MACRO(m, n) i = nvc0_graph_set_macro(screen, m, i, sizeof(n), n);
+
+   i = 0;
+   MK_MACRO(NVC0_3D_BLEND_ENABLES, nvc0_9097_blend_enables);
+   MK_MACRO(NVC0_3D_VERTEX_ARRAY_SELECT, nvc0_9097_vertex_array_select);
+   MK_MACRO(NVC0_3D_TEP_SELECT, nvc0_9097_tep_select);
+   MK_MACRO(NVC0_3D_GP_SELECT, nvc0_9097_gp_select);
+   MK_MACRO(NVC0_3D_POLYGON_MODE_FRONT, nvc0_9097_poly_mode_front);
+   MK_MACRO(NVC0_3D_POLYGON_MODE_BACK, nvc0_9097_poly_mode_back);
+   MK_MACRO(NVC0_3D_COLOR_MASK_BROADCAST, nvc0_9097_color_mask_brdc);
+
+   BEGIN_RING(chan, RING_3D(RASTERIZE_ENABLE), 1);
+   OUT_RING  (chan, 1);
+   // BEGIN_RING(chan, RING_3D(GP_SELECT), 1);
+   // OUT_RING  (chan, 0x40);
+   BEGIN_RING(chan, RING_3D(SP_SELECT(4)), 1);
+   OUT_RING  (chan, 0x40);
+   BEGIN_RING(chan, RING_3D(GP_BUILTIN_RESULT_EN), 1);
+   OUT_RING  (chan, 0);
+   // BEGIN_RING(chan, RING_3D(TEP_SELECT), 1);
+   // OUT_RING  (chan, 0x30);
+   BEGIN_RING(chan, RING_3D(SP_SELECT(3)), 1);
+   OUT_RING  (chan, 0x30);
+   BEGIN_RING(chan, RING_3D(PATCH_VERTICES), 1);
+   OUT_RING  (chan, 3);
+   BEGIN_RING(chan, RING_3D(SP_SELECT(2)), 1);
+   OUT_RING  (chan, 0x20);
+   BEGIN_RING(chan, RING_3D(SP_SELECT(0)), 1);
+   OUT_RING  (chan, 0x00);
+
+   BEGIN_RING(chan, RING_3D(POINT_COORD_REPLACE), 1);
+   OUT_RING  (chan, 0);
+
+   BEGIN_RING(chan, RING_3D(FRAG_COLOR_CLAMP_EN), 1);
+   OUT_RING  (chan, 0x11111111);
+   BEGIN_RING(chan, RING_3D(EDGEFLAG_ENABLE), 1);
+   OUT_RING  (chan, 1);
+
+   BEGIN_RING(chan, RING_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2);
+   OUT_RING  (chan, 0xab);
+   OUT_RING  (chan, 0x00000000);
+   BEGIN_RING(chan, RING_3D_(0x07e8), 2);
+   OUT_RING  (chan, 0xac);
+   OUT_RING  (chan, 0x00000000);
+   BEGIN_RING(chan, RING_3D_(0x07f0), 2);
+   OUT_RING  (chan, 0xac);
+   OUT_RING  (chan, 0x00000000);
+
+   FIRE_RING (chan);
+
+   screen->tic.entries = CALLOC(4096, sizeof(void *));
+   screen->tsc.entries = screen->tic.entries + 2048;
+
+   return pscreen;
+
+fail:
+   nvc0_screen_destroy(pscreen);
+   return NULL;
+}
+
+void
+nvc0_screen_make_buffers_resident(struct nvc0_screen *screen)
+{
+   struct nouveau_channel *chan = screen->base.channel;
+
+   const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
+
+   nouveau_reloc_emit(chan, NULL, 0, NULL, screen->text, 0, 0, flags, 0, 0);
+   nouveau_reloc_emit(chan, NULL, 0, NULL, screen->txc, 0, 0, flags, 0, 0);
+   nouveau_reloc_emit(chan, NULL, 0, NULL, screen->tls, 0, 0, flags, 0, 0);
+}
+
+int
+nvc0_screen_tic_alloc(struct nvc0_screen *screen, void *entry)
+{
+   int i = screen->tic.next;
+
+   while (screen->tic.lock[i / 32] & (1 << (i % 32)))
+      i = (i + 1) & (NVC0_TIC_MAX_ENTRIES - 1);
+
+   screen->tic.next = (i + 1) & (NVC0_TIC_MAX_ENTRIES - 1);
+
+   if (screen->tic.entries[i])
+      nvc0_tic_entry(screen->tic.entries[i])->id = -1;
+
+   screen->tic.entries[i] = entry;
+   return i;
+}
+
+int
+nvc0_screen_tsc_alloc(struct nvc0_screen *screen, void *entry)
+{
+   int i = screen->tsc.next;
+
+   while (screen->tsc.lock[i / 32] & (1 << (i % 32)))
+      i = (i + 1) & (NVC0_TSC_MAX_ENTRIES - 1);
+
+   screen->tsc.next = (i + 1) & (NVC0_TSC_MAX_ENTRIES - 1);
+
+   if (screen->tsc.entries[i])
+      nvc0_tsc_entry(screen->tsc.entries[i])->id = -1;
+
+   screen->tsc.entries[i] = entry;
+   return i;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h
new file mode 100644 (file)
index 0000000..989e183
--- /dev/null
@@ -0,0 +1,110 @@
+#ifndef __NVC0_SCREEN_H__
+#define __NVC0_SCREEN_H__
+
+#define NOUVEAU_NVC0
+#include "nouveau/nouveau_screen.h"
+#undef NOUVEAU_NVC0
+#include "nvc0_winsys.h"
+#include "nvc0_stateobj.h"
+
+#define NVC0_TIC_MAX_ENTRIES 2048
+#define NVC0_TSC_MAX_ENTRIES 2048
+
+struct nvc0_context;
+struct nvc0_fence;
+
+struct nvc0_screen {
+   struct nouveau_screen base;
+   struct nouveau_winsys *nvws;
+
+   struct nvc0_context *cur_ctx;
+
+   struct nouveau_bo *text;
+   struct nouveau_bo *uniforms;
+   struct nouveau_bo *tls;
+   struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
+   struct nouveau_bo *mp_stack_bo;
+
+   uint64_t tls_size;
+
+   struct nouveau_resource *text_heap;
+
+   struct {
+      void **entries;
+      int next;
+      uint32_t lock[NVC0_TIC_MAX_ENTRIES / 32];
+   } tic;
+   
+   struct {
+      void **entries;
+      int next;
+      uint32_t lock[NVC0_TSC_MAX_ENTRIES / 32];
+   } tsc;
+
+   struct {
+      uint32_t *map;
+      struct nvc0_fence *head;
+      struct nvc0_fence *tail;
+      struct nvc0_fence *current;
+      uint32_t sequence;
+      uint32_t sequence_ack;
+      struct nouveau_bo *bo;
+   } fence;
+};
+
+static INLINE struct nvc0_screen *
+nvc0_screen(struct pipe_screen *screen)
+{
+   return (struct nvc0_screen *)screen;
+}
+
+void nvc0_screen_make_buffers_resident(struct nvc0_screen *);
+
+int nvc0_screen_tic_alloc(struct nvc0_screen *, void *);
+int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *);
+
+boolean
+nvc0_screen_fence_new(struct nvc0_screen *, struct nvc0_fence **, boolean emit);
+
+struct nvc0_format {
+   uint32_t rt;
+   uint32_t tic;
+   uint32_t vtx;
+   uint32_t usage;
+};
+
+extern const struct nvc0_format nvc0_format_table[];
+
+static INLINE void
+nvc0_screen_tic_unlock(struct nvc0_screen *screen, struct nvc0_tic_entry *tic)
+{
+   if (tic->id >= 0)
+      screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
+}
+
+static INLINE void
+nvc0_screen_tsc_unlock(struct nvc0_screen *screen, struct nvc0_tsc_entry *tsc)
+{
+   if (tsc->id >= 0)
+      screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
+}
+
+static INLINE void
+nvc0_screen_tic_free(struct nvc0_screen *screen, struct nvc0_tic_entry *tic)
+{
+   if (tic->id >= 0) {
+      screen->tic.entries[tic->id] = NULL;
+      screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
+   }
+}
+
+static INLINE void
+nvc0_screen_tsc_free(struct nvc0_screen *screen, struct nvc0_tsc_entry *tsc)
+{
+   if (tsc->id >= 0) {
+      screen->tsc.entries[tsc->id] = NULL;
+      screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
+   }
+}
+
+#endif
diff --git a/src/gallium/drivers/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nvc0/nvc0_shader_state.c
new file mode 100644 (file)
index 0000000..932848e
--- /dev/null
@@ -0,0 +1,172 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+
+#include "nvc0_context.h"
+
+static boolean
+nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
+{
+   int ret;
+   unsigned size;
+
+   if (prog->translated)
+      return TRUE;
+
+   prog->translated = nvc0_program_translate(prog);
+   if (!prog->translated)
+      return FALSE;
+
+   size = align(prog->code_size + NVC0_SHADER_HEADER_SIZE, 0x100);
+
+   ret = nouveau_resource_alloc(nvc0->screen->text_heap, size, prog,
+                                &prog->res);
+   if (ret)
+      return FALSE;
+
+   prog->code_base = prog->res->start;
+
+   nvc0_m2mf_push_linear(nvc0, nvc0->screen->text, NOUVEAU_BO_VRAM,
+                         prog->code_base, NVC0_SHADER_HEADER_SIZE, prog->hdr);
+   nvc0_m2mf_push_linear(nvc0, nvc0->screen->text, NOUVEAU_BO_VRAM,
+                         prog->code_base + NVC0_SHADER_HEADER_SIZE,
+                         prog->code_size, prog->code);
+
+   BEGIN_RING(nvc0->screen->base.channel, RING_3D_(0x021c), 1);
+   OUT_RING  (nvc0->screen->base.channel, 0x1111);
+
+   return TRUE;
+}
+
+void
+nvc0_vertprog_validate(struct nvc0_context *nvc0)
+{
+   struct nouveau_channel *chan = nvc0->screen->base.channel;
+   struct nvc0_program *vp = nvc0->vertprog;
+
+   if (!nvc0_program_validate(nvc0, vp))
+         return;
+
+   BEGIN_RING(chan, RING_3D(SP_SELECT(1)), 2);
+   OUT_RING  (chan, 0x11);
+   OUT_RING  (chan, vp->code_base);
+   BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(1)), 1);
+   OUT_RING  (chan, vp->max_gpr);
+
+   BEGIN_RING(chan, RING_3D_(0x163c), 1);
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_3D_(0x2600), 1);
+   OUT_RING  (chan, 1);
+}
+
+void
+nvc0_fragprog_validate(struct nvc0_context *nvc0)
+{
+   struct nouveau_channel *chan = nvc0->screen->base.channel;
+   struct nvc0_program *fp = nvc0->fragprog;
+
+   if (!nvc0_program_validate(nvc0, fp))
+         return;
+
+   BEGIN_RING(chan, RING_3D(EARLY_FRAGMENT_TESTS), 1);
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_3D(SP_SELECT(5)), 2);
+   OUT_RING  (chan, 0x51);
+   OUT_RING  (chan, fp->code_base);
+   BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(5)), 1);
+   OUT_RING  (chan, fp->max_gpr);
+
+   BEGIN_RING(chan, RING_3D_(0x0360), 2);
+   OUT_RING  (chan, 0x20164010);
+   OUT_RING  (chan, 0x20);
+   BEGIN_RING(chan, RING_3D_(0x196c), 1);
+   OUT_RING  (chan, fp->flags[0]);
+}
+
+void
+nvc0_tctlprog_validate(struct nvc0_context *nvc0)
+{
+   struct nouveau_channel *chan = nvc0->screen->base.channel;
+   struct nvc0_program *tp = nvc0->tctlprog;
+
+   if (!tp) {
+      BEGIN_RING(chan, RING_3D(SP_SELECT(2)), 1);
+      OUT_RING  (chan, 0x20);
+      return;
+   }
+   if (!nvc0_program_validate(nvc0, tp))
+         return;
+
+   BEGIN_RING(chan, RING_3D(SP_SELECT(2)), 2);
+   OUT_RING  (chan, 0x21);
+   OUT_RING  (chan, tp->code_base);
+   BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(2)), 1);
+   OUT_RING  (chan, tp->max_gpr);   
+}
+
+void
+nvc0_tevlprog_validate(struct nvc0_context *nvc0)
+{
+   struct nouveau_channel *chan = nvc0->screen->base.channel;
+   struct nvc0_program *tp = nvc0->tevlprog;
+
+   if (!tp) {
+      BEGIN_RING(chan, RING_3D(TEP_SELECT), 1);
+      OUT_RING  (chan, 0x30);
+      return;
+   }
+   if (!nvc0_program_validate(nvc0, tp))
+         return;
+
+   BEGIN_RING(chan, RING_3D(TEP_SELECT), 1);
+   OUT_RING  (chan, 0x31);
+   BEGIN_RING(chan, RING_3D(SP_START_ID(3)), 1);
+   OUT_RING  (chan, tp->code_base);
+   BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(3)), 1);
+   OUT_RING  (chan, tp->max_gpr);   
+}
+
+void
+nvc0_gmtyprog_validate(struct nvc0_context *nvc0)
+{
+   struct nouveau_channel *chan = nvc0->screen->base.channel;
+   struct nvc0_program *gp = nvc0->gmtyprog;
+
+   if (!gp) {
+      BEGIN_RING(chan, RING_3D(GP_SELECT), 1);
+      OUT_RING  (chan, 0x40);
+      return;
+   }
+   if (!nvc0_program_validate(nvc0, gp))
+         return;
+
+   BEGIN_RING(chan, RING_3D(GP_SELECT), 1);
+   OUT_RING  (chan, 0x41);
+   BEGIN_RING(chan, RING_3D(SP_START_ID(4)), 1);
+   OUT_RING  (chan, gp->code_base);
+   BEGIN_RING(chan, RING_3D(SP_GPR_ALLOC(4)), 1);
+   OUT_RING  (chan, gp->max_gpr);   
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c
new file mode 100644 (file)
index 0000000..afba7ce
--- /dev/null
@@ -0,0 +1,849 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+
+#include "tgsi/tgsi_parse.h"
+
+#include "nvc0_stateobj.h"
+#include "nvc0_context.h"
+
+#include "nvc0_3d.xml.h"
+#include "nv50_texture.xml.h"
+
+#include "nouveau/nouveau_gldefs.h"
+
+static INLINE uint32_t
+nvc0_colormask(unsigned mask)
+{
+    uint32_t ret = 0;
+
+    if (mask & PIPE_MASK_R)
+        ret |= 0x0001;
+    if (mask & PIPE_MASK_G)
+        ret |= 0x0010;
+    if (mask & PIPE_MASK_B)
+        ret |= 0x0100;
+    if (mask & PIPE_MASK_A)
+        ret |= 0x1000;
+
+    return ret;
+}
+
+static INLINE uint32_t
+nvc0_blend_fac(unsigned factor)
+{
+    static const uint16_t bf[] = {
+        NV50_3D_BLEND_FACTOR_ZERO, /* 0x00 */
+        NV50_3D_BLEND_FACTOR_ONE,
+        NV50_3D_BLEND_FACTOR_SRC_COLOR,
+        NV50_3D_BLEND_FACTOR_SRC_ALPHA,
+        NV50_3D_BLEND_FACTOR_DST_ALPHA,
+        NV50_3D_BLEND_FACTOR_DST_COLOR,
+        NV50_3D_BLEND_FACTOR_SRC_ALPHA_SATURATE,
+        NV50_3D_BLEND_FACTOR_CONSTANT_COLOR,
+        NV50_3D_BLEND_FACTOR_CONSTANT_ALPHA,
+        NV50_3D_BLEND_FACTOR_SRC1_COLOR,
+        NV50_3D_BLEND_FACTOR_SRC1_ALPHA,
+        NV50_3D_BLEND_FACTOR_ZERO, /* 0x0b */
+        NV50_3D_BLEND_FACTOR_ZERO, /* 0x0c */
+        NV50_3D_BLEND_FACTOR_ZERO, /* 0x0d */
+        NV50_3D_BLEND_FACTOR_ZERO, /* 0x0e */
+        NV50_3D_BLEND_FACTOR_ZERO, /* 0x0f */
+        NV50_3D_BLEND_FACTOR_ZERO, /* 0x10 */
+        NV50_3D_BLEND_FACTOR_ZERO, /* 0x11 */
+        NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_COLOR,
+        NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
+        NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_ALPHA,
+        NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_COLOR,
+        NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR,
+        NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA,
+        NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR,
+        NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA
+    };
+
+    assert(factor < (sizeof(bf) / sizeof(bf[0])));
+    return bf[factor];
+}
+
+static void *
+nvc0_blend_state_create(struct pipe_context *pipe,
+                        const struct pipe_blend_state *cso)
+{
+    struct nvc0_blend_stateobj *so = CALLOC_STRUCT(nvc0_blend_stateobj);
+    int i;
+
+    so->pipe = *cso;
+
+    SB_OUT_3D(so, BLEND_INDEPENDENT, cso->independent_blend_enable);
+
+    if (!cso->independent_blend_enable) {
+        SB_BEGIN_3D(so, BLEND_ENABLES, 1);
+        SB_DATA    (so, cso->rt[0].blend_enable ? 0xff : 0);
+
+        if (cso->rt[0].blend_enable) {
+            SB_BEGIN_3D(so, BLEND_EQUATION_RGB, 5);
+            SB_DATA    (so, nvgl_blend_eqn(cso->rt[0].rgb_func));
+            SB_DATA    (so, nvc0_blend_fac(cso->rt[0].rgb_src_factor));
+            SB_DATA    (so, nvc0_blend_fac(cso->rt[0].rgb_dst_factor));
+            SB_DATA    (so, nvgl_blend_eqn(cso->rt[0].alpha_func));
+            SB_DATA    (so, nvc0_blend_fac(cso->rt[0].alpha_src_factor));
+            SB_BEGIN_3D(so, BLEND_FUNC_DST_ALPHA, 1);
+            SB_DATA    (so, nvc0_blend_fac(cso->rt[0].alpha_dst_factor));
+        }
+
+        SB_BEGIN_3D(so, COLOR_MASK_BROADCAST, 1);
+        SB_DATA    (so, nvc0_colormask(cso->rt[0].colormask));
+    } else {
+        uint8_t en = 0;
+
+        for (i = 0; i < 8; ++i) {
+            if (!cso->rt[i].blend_enable)
+                continue;
+            en |= 1 << i;
+
+            SB_BEGIN_3D(so, IBLEND_EQUATION_RGB(i), 6);
+            SB_DATA    (so, nvgl_blend_eqn(cso->rt[i].rgb_func));
+            SB_DATA    (so, nvc0_blend_fac(cso->rt[i].rgb_src_factor));
+            SB_DATA    (so, nvc0_blend_fac(cso->rt[i].rgb_dst_factor));
+            SB_DATA    (so, nvgl_blend_eqn(cso->rt[i].alpha_func));
+            SB_DATA    (so, nvc0_blend_fac(cso->rt[i].alpha_src_factor));
+            SB_DATA    (so, nvc0_blend_fac(cso->rt[i].alpha_dst_factor));
+        }
+        SB_BEGIN_3D(so, BLEND_ENABLES, 1);
+        SB_DATA    (so, en);
+
+        SB_BEGIN_3D(so, COLOR_MASK(0), 8);
+        for (i = 0; i < 8; ++i)
+            SB_DATA(so, nvc0_colormask(cso->rt[i].colormask));
+    }
+
+    if (cso->logicop_enable) {
+       SB_BEGIN_3D(so, LOGIC_OP_ENABLE, 2);
+       SB_DATA    (so, 1);
+       SB_DATA    (so, nvgl_logicop_func(cso->logicop_func));
+    } else {
+       SB_OUT_3D  (so, LOGIC_OP_ENABLE, 0);
+    }
+
+    assert(so->size < (sizeof(so->state) / sizeof(so->state[0])));
+    return so;
+}
+
+static void
+nvc0_blend_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+    struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+    nvc0->blend = hwcso;
+    nvc0->dirty |= NVC0_NEW_BLEND;
+}
+
+static void
+nvc0_blend_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+    FREE(hwcso);
+}
+
+static void *
+nvc0_rasterizer_state_create(struct pipe_context *pipe,
+                             const struct pipe_rasterizer_state *cso)
+{
+    struct nvc0_rasterizer_stateobj *so;
+
+    so = CALLOC_STRUCT(nvc0_rasterizer_stateobj);
+    if (!so)
+        return NULL;
+    so->pipe = *cso;
+    
+    SB_BEGIN_3D(so, SHADE_MODEL, 1);
+    SB_DATA    (so, cso->flatshade ? NVC0_3D_SHADE_MODEL_FLAT :
+                                     NVC0_3D_SHADE_MODEL_SMOOTH);
+    SB_OUT_3D  (so, PROVOKING_VERTEX_LAST, !cso->flatshade_first);
+    SB_OUT_3D  (so, VERTEX_TWO_SIDE_ENABLE, cso->light_twoside);
+
+    SB_BEGIN_3D(so, LINE_WIDTH, 1);
+    SB_DATA    (so, fui(cso->line_width));
+    SB_OUT_3D  (so, LINE_SMOOTH_ENABLE, cso->line_smooth);
+
+    SB_BEGIN_3D(so, LINE_STIPPLE_ENABLE, 1);
+    if (cso->line_stipple_enable) {
+        SB_DATA    (so, 1);
+        SB_BEGIN_3D(so, LINE_STIPPLE_PATTERN, 1);
+        SB_DATA    (so, (cso->line_stipple_pattern << 8) |
+                         cso->line_stipple_factor);
+                    
+    } else {
+        SB_DATA    (so, 0);
+    }
+
+    SB_OUT_3D(so, VP_POINT_SIZE_EN, cso->point_size_per_vertex);
+    if (!cso->point_size_per_vertex) {
+       SB_BEGIN_3D(so, POINT_SIZE, 1);
+       SB_DATA    (so, fui(cso->point_size));
+    }
+    SB_OUT_3D(so, POINT_SPRITE_ENABLE, cso->point_quad_rasterization);
+
+    SB_BEGIN_3D(so, POLYGON_MODE_FRONT, 1);
+    SB_DATA    (so, nvgl_polygon_mode(cso->fill_front));
+    SB_BEGIN_3D(so, POLYGON_MODE_BACK, 1);
+    SB_DATA    (so, nvgl_polygon_mode(cso->fill_back));
+    SB_OUT_3D  (so, POLYGON_SMOOTH_ENABLE, cso->poly_smooth);
+
+    if (cso->cull_face != PIPE_FACE_NONE) {
+        SB_BEGIN_3D(so, CULL_FACE_ENABLE, 3);
+        SB_DATA    (so, 1);
+        SB_DATA    (so, cso->front_ccw ? NVC0_3D_FRONT_FACE_CCW :
+                                         NVC0_3D_FRONT_FACE_CW);
+        switch (cso->cull_face) {
+        case PIPE_FACE_FRONT_AND_BACK:
+            SB_DATA(so, NVC0_3D_CULL_FACE_FRONT_AND_BACK);
+            break;
+        case PIPE_FACE_FRONT:
+            SB_DATA(so, NVC0_3D_CULL_FACE_FRONT);
+            break;
+        case PIPE_FACE_BACK:
+        default:
+            SB_DATA(so, NVC0_3D_CULL_FACE_BACK);
+            break;
+        }
+    } else {
+       SB_OUT_3D(so, CULL_FACE_ENABLE, 0);
+    }
+
+    SB_OUT_3D  (so, POLYGON_STIPPLE_ENABLE, cso->poly_stipple_enable);
+    SB_BEGIN_3D(so, POLYGON_OFFSET_POINT_ENABLE, 3);
+    SB_DATA    (so, cso->offset_point);
+    SB_DATA    (so, cso->offset_line);
+    SB_DATA    (so, cso->offset_tri);
+
+    if (cso->offset_point || cso->offset_line || cso->offset_tri) {
+        SB_BEGIN_3D(so, POLYGON_OFFSET_FACTOR, 1);
+        SB_DATA    (so, fui(cso->offset_scale));
+        SB_BEGIN_3D(so, POLYGON_OFFSET_UNITS, 1);
+        SB_DATA    (so, fui(cso->offset_units)); /* XXX: multiply by 2 ? */
+    }
+
+    assert(so->size < (sizeof(so->state) / sizeof(so->state[0])));
+    return (void *)so;
+}
+
+static void
+nvc0_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+   struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+   nvc0->rast = hwcso;
+   nvc0->dirty |= NVC0_NEW_RASTERIZER;
+}
+
+static void
+nvc0_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+   FREE(hwcso);
+}
+
+static void *
+nvc0_zsa_state_create(struct pipe_context *pipe,
+                      const struct pipe_depth_stencil_alpha_state *cso)
+{
+   struct nvc0_zsa_stateobj *so = CALLOC_STRUCT(nvc0_zsa_stateobj);
+
+   so->pipe = *cso;
+
+   SB_OUT_3D  (so, DEPTH_WRITE_ENABLE, cso->depth.writemask);
+   SB_BEGIN_3D(so, DEPTH_TEST_ENABLE, 1);
+   if (cso->depth.enabled) {
+      SB_DATA    (so, 1);
+      SB_BEGIN_3D(so, DEPTH_TEST_FUNC, 1);
+      SB_DATA    (so, nvgl_comparison_op(cso->depth.func));
+   } else {
+      SB_DATA    (so, 0);
+   }
+
+   if (cso->stencil[0].enabled) {
+      SB_BEGIN_3D(so, STENCIL_FRONT_ENABLE, 5);
+      SB_DATA    (so, 1);
+      SB_DATA    (so, nvgl_stencil_op(cso->stencil[0].fail_op));
+      SB_DATA    (so, nvgl_stencil_op(cso->stencil[0].zfail_op));
+      SB_DATA    (so, nvgl_stencil_op(cso->stencil[0].zpass_op));
+      SB_DATA    (so, nvgl_comparison_op(cso->stencil[0].func));
+      SB_BEGIN_3D(so, STENCIL_FRONT_MASK, 2);
+      SB_DATA    (so, cso->stencil[0].writemask);
+      SB_DATA    (so, cso->stencil[0].valuemask);
+   } else {
+      SB_OUT_3D  (so, STENCIL_FRONT_ENABLE, 0);
+   }
+
+   if (cso->stencil[1].enabled) {
+      SB_BEGIN_3D(so, STENCIL_TWO_SIDE_ENABLE, 5);
+      SB_DATA    (so, 1);
+      SB_DATA    (so, nvgl_stencil_op(cso->stencil[1].fail_op));
+      SB_DATA    (so, nvgl_stencil_op(cso->stencil[1].zfail_op));
+      SB_DATA    (so, nvgl_stencil_op(cso->stencil[1].zpass_op));
+      SB_DATA    (so, nvgl_comparison_op(cso->stencil[1].func));
+      SB_BEGIN_3D(so, STENCIL_BACK_MASK, 2);
+      SB_DATA    (so, cso->stencil[1].writemask);
+      SB_DATA    (so, cso->stencil[1].valuemask);
+   } else {
+      SB_OUT_3D  (so, STENCIL_TWO_SIDE_ENABLE, 0);
+   }
+    
+   SB_BEGIN_3D(so, ALPHA_TEST_ENABLE, 1);
+   if (cso->alpha.enabled) {
+      SB_DATA    (so, 1);
+      SB_BEGIN_3D(so, ALPHA_TEST_REF, 2);
+      SB_DATA    (so, fui(cso->alpha.ref_value));
+      SB_DATA    (so, nvgl_comparison_op(cso->alpha.func));
+   } else {
+      SB_DATA    (so, 0);
+   }
+
+   assert(so->size < (sizeof(so->state) / sizeof(so->state[0])));
+   return (void *)so;
+}
+
+static void
+nvc0_zsa_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+   struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+   nvc0->zsa = hwcso;
+   nvc0->dirty |= NVC0_NEW_ZSA;
+}
+
+static void
+nvc0_zsa_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+   FREE(hwcso);
+}
+
+/* ====================== SAMPLERS AND TEXTURES ================================
+ */
+
+#define NV50_TSC_WRAP_CASE(n) \
+    case PIPE_TEX_WRAP_##n: return NV50_TSC_WRAP_##n
+
+static INLINE unsigned
+nv50_tsc_wrap_mode(unsigned wrap)
+{
+   switch (wrap) {
+   NV50_TSC_WRAP_CASE(REPEAT);
+   NV50_TSC_WRAP_CASE(MIRROR_REPEAT);
+   NV50_TSC_WRAP_CASE(CLAMP_TO_EDGE);
+   NV50_TSC_WRAP_CASE(CLAMP_TO_BORDER);
+   NV50_TSC_WRAP_CASE(CLAMP);
+   NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_EDGE);
+   NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_BORDER);
+   NV50_TSC_WRAP_CASE(MIRROR_CLAMP);
+   default:
+       NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
+       return NV50_TSC_WRAP_REPEAT;
+   }
+}
+
+static void *
+nvc0_sampler_state_create(struct pipe_context *pipe,
+                          const struct pipe_sampler_state *cso)
+{
+   struct nvc0_tsc_entry *so = CALLOC_STRUCT(nvc0_tsc_entry);
+   float f[2];
+
+   so->id = -1;
+
+   so->tsc[0] = (0x00026000 |
+                 (nv50_tsc_wrap_mode(cso->wrap_s) << 0) |
+                 (nv50_tsc_wrap_mode(cso->wrap_t) << 3) |
+                 (nv50_tsc_wrap_mode(cso->wrap_r) << 6));
+
+   switch (cso->mag_img_filter) {
+   case PIPE_TEX_FILTER_LINEAR:
+      so->tsc[1] |= NV50_TSC_1_MAGF_LINEAR;
+      break;
+   case PIPE_TEX_FILTER_NEAREST:
+   default:
+      so->tsc[1] |= NV50_TSC_1_MAGF_NEAREST;
+      break;
+   }
+
+   switch (cso->min_img_filter) {
+   case PIPE_TEX_FILTER_LINEAR:
+      so->tsc[1] |= NV50_TSC_1_MINF_LINEAR;
+      break;
+   case PIPE_TEX_FILTER_NEAREST:
+   default:
+      so->tsc[1] |= NV50_TSC_1_MINF_NEAREST;
+      break;
+   }
+
+   switch (cso->min_mip_filter) {
+   case PIPE_TEX_MIPFILTER_LINEAR:
+      so->tsc[1] |= NV50_TSC_1_MIPF_LINEAR;
+      break;
+   case PIPE_TEX_MIPFILTER_NEAREST:
+      so->tsc[1] |= NV50_TSC_1_MIPF_NEAREST;
+      break;
+   case PIPE_TEX_MIPFILTER_NONE:
+   default:
+      so->tsc[1] |= NV50_TSC_1_MIPF_NONE;
+      break;
+   }
+
+   if (cso->max_anisotropy >= 16)
+      so->tsc[0] |= (7 << 20);
+   else
+   if (cso->max_anisotropy >= 12)
+      so->tsc[0] |= (6 << 20);
+   else {
+      so->tsc[0] |= (cso->max_anisotropy >> 1) << 20;
+
+      if (cso->max_anisotropy >= 4)
+         so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_35;
+      else
+      if (cso->max_anisotropy >= 2)
+         so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_15;
+   }
+
+   if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+      /* NOTE: must be deactivated for non-shadow textures */
+      so->tsc[0] |= (1 << 9);
+      so->tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7) << 10;
+   }
+
+   f[0] = CLAMP(cso->lod_bias, -16.0f, 15.0f);
+   so->tsc[1] |= ((int)(f[0] * 256.0f) & 0x1fff) << 12;
+
+   f[0] = CLAMP(cso->min_lod, 0.0f, 15.0f);
+   f[1] = CLAMP(cso->max_lod, 0.0f, 15.0f);
+   so->tsc[2] |=
+      (((int)(f[1] * 256.0f) & 0xfff) << 12) | ((int)(f[0] * 256.0f) & 0xfff);
+
+   so->tsc[4] = fui(cso->border_color[0]);
+   so->tsc[5] = fui(cso->border_color[1]);
+   so->tsc[6] = fui(cso->border_color[2]);
+   so->tsc[7] = fui(cso->border_color[3]);
+
+   return (void *)so;
+}
+
+static void
+nvc0_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+   unsigned s, i;
+
+   for (s = 0; s < 5; ++s)
+      for (i = 0; i < nvc0_context(pipe)->num_samplers[s]; ++i)
+         if (nvc0_context(pipe)->samplers[s][i] == hwcso)
+            nvc0_context(pipe)->samplers[s][i] = NULL;
+
+   nvc0_screen_tsc_free(nvc0_context(pipe)->screen, nvc0_tsc_entry(hwcso));
+
+   FREE(hwcso);
+}
+
+static INLINE void
+nvc0_stage_sampler_states_bind(struct nvc0_context *nvc0, int s,
+                               unsigned nr, void **hwcso)
+{
+   unsigned i;
+
+   for (i = 0; i < nr; ++i) {
+      struct nvc0_tsc_entry *old = nvc0->samplers[s][i];
+
+      nvc0->samplers[s][i] = nvc0_tsc_entry(hwcso[i]);
+      if (old)
+         nvc0_screen_tsc_unlock(nvc0->screen, old);
+   }
+   for (; i < nvc0->num_samplers[s]; ++i)
+      if (nvc0->samplers[s][i])
+         nvc0_screen_tsc_unlock(nvc0->screen, nvc0->samplers[s][i]);
+
+   nvc0->num_samplers[s] = nr;
+
+   nvc0->dirty |= NVC0_NEW_SAMPLERS;
+}
+
+static void
+nvc0_vp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s)
+{
+   nvc0_stage_sampler_states_bind(nvc0_context(pipe), 0, nr, s);
+}
+
+static void
+nvc0_fp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s)
+{
+   nvc0_stage_sampler_states_bind(nvc0_context(pipe), 4, nr, s);
+}
+
+static void
+nvc0_gp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s)
+{
+   nvc0_stage_sampler_states_bind(nvc0_context(pipe), 3, nr, s);
+}
+
+/* NOTE: only called when not referenced anywhere, won't be bound */
+static void
+nvc0_sampler_view_destroy(struct pipe_context *pipe,
+                          struct pipe_sampler_view *view)
+{
+   pipe_resource_reference(&view->texture, NULL);
+
+   nvc0_screen_tic_free(nvc0_context(pipe)->screen, nvc0_tic_entry(view));
+
+   FREE(nvc0_tic_entry(view));
+}
+
+static INLINE void
+nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s,
+                             unsigned nr,
+                             struct pipe_sampler_view **views)
+{
+   unsigned i;
+
+   for (i = 0; i < nr; ++i) {
+      struct nvc0_tic_entry *old = nvc0_tic_entry(nvc0->textures[s][i]);
+      if (old)
+         nvc0_screen_tic_unlock(nvc0->screen, old);
+
+      pipe_sampler_view_reference(&nvc0->textures[s][i], views[i]);
+   }
+
+   for (i = nr; i < nvc0->num_textures[s]; ++i) {
+      struct nvc0_tic_entry *old = nvc0_tic_entry(nvc0->textures[s][i]);
+      if (!old)
+         continue;
+      nvc0_screen_tic_unlock(nvc0->screen, old);
+
+      pipe_sampler_view_reference(&nvc0->textures[s][i], NULL);
+   }
+
+   nvc0->num_textures[s] = nr;
+
+   nvc0->dirty |= NVC0_NEW_TEXTURES;
+}
+
+static void
+nvc0_vp_set_sampler_views(struct pipe_context *pipe,
+                          unsigned nr,
+                          struct pipe_sampler_view **views)
+{
+   nvc0_stage_set_sampler_views(nvc0_context(pipe), 0, nr, views);
+}
+
+static void
+nvc0_fp_set_sampler_views(struct pipe_context *pipe,
+                          unsigned nr,
+                          struct pipe_sampler_view **views)
+{
+   nvc0_stage_set_sampler_views(nvc0_context(pipe), 4, nr, views);
+}
+
+static void
+nvc0_gp_set_sampler_views(struct pipe_context *pipe,
+                          unsigned nr,
+                          struct pipe_sampler_view **views)
+{
+   nvc0_stage_set_sampler_views(nvc0_context(pipe), 3, nr, views);
+}
+
+/* ============================= SHADERS =======================================
+ */
+
+static void *
+nvc0_sp_state_create(struct pipe_context *pipe,
+                     const struct pipe_shader_state *cso, unsigned type)
+{
+   struct nvc0_program *prog;
+
+   prog = CALLOC_STRUCT(nvc0_program);
+   if (!prog)
+      return NULL;
+
+   prog->type = type;
+   prog->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+
+   return (void *)prog;
+}
+
+static void
+nvc0_sp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+   struct nvc0_program *prog = (struct nvc0_program *)hwcso;
+
+   nvc0_program_destroy(nvc0_context(pipe), prog);
+
+   FREE((void *)prog->pipe.tokens);
+   FREE(prog);
+}
+
+static void *
+nvc0_vp_state_create(struct pipe_context *pipe,
+                     const struct pipe_shader_state *cso)
+{
+   return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_VERTEX);
+}
+
+static void
+nvc0_vp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+    struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+    nvc0->vertprog = hwcso;
+    nvc0->dirty |= NVC0_NEW_VERTPROG;
+}
+
+static void *
+nvc0_fp_state_create(struct pipe_context *pipe,
+                     const struct pipe_shader_state *cso)
+{
+   return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_FRAGMENT);
+}
+
+static void
+nvc0_fp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+    struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+    nvc0->fragprog = hwcso;
+    nvc0->dirty |= NVC0_NEW_FRAGPROG;
+}
+
+static void *
+nvc0_gp_state_create(struct pipe_context *pipe,
+                     const struct pipe_shader_state *cso)
+{
+   return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_GEOMETRY);
+}
+
+static void
+nvc0_gp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+    struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+    nvc0->gmtyprog = hwcso;
+    nvc0->dirty |= NVC0_NEW_GMTYPROG;
+}
+
+static void
+nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
+                         struct pipe_resource *res)
+{
+   struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+   switch (shader) {
+   case PIPE_SHADER_VERTEX: shader = 0; break;
+      /*
+   case PIPE_SHADER_TESSELLATION_CONTROL: shader = 1; break;
+   case PIPE_SHADER_TESSELLATION_EVALUATION: shader = 2; break;
+      */
+   case PIPE_SHADER_GEOMETRY: shader = 3; break;
+   case PIPE_SHADER_FRAGMENT: shader = 4; break;
+   default:
+      assert(0);
+      break;
+   }
+
+   if (nvc0->constbuf[shader][index])
+      nvc0_bufctx_del_resident(nvc0, NVC0_BUFCTX_CONSTANT,
+                              nvc0_resource(
+                                      nvc0->constbuf[shader][index]));
+
+   pipe_resource_reference(&nvc0->constbuf[shader][index], res);
+
+   nvc0->constbuf_dirty[shader] |= 1 << index;
+
+   nvc0->dirty |= NVC0_NEW_CONSTBUF;
+}
+
+/* =============================================================================
+ */
+
+static void
+nvc0_set_blend_color(struct pipe_context *pipe,
+                     const struct pipe_blend_color *bcol)
+{
+    struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+    nvc0->blend_colour = *bcol;
+    nvc0->dirty |= NVC0_NEW_BLEND_COLOUR;
+}
+
+static void
+nvc0_set_stencil_ref(struct pipe_context *pipe,
+                     const struct pipe_stencil_ref *sr)
+{
+    struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+    nvc0->stencil_ref = *sr;
+    nvc0->dirty |= NVC0_NEW_STENCIL_REF;
+}
+
+static void
+nvc0_set_clip_state(struct pipe_context *pipe,
+                    const struct pipe_clip_state *clip)
+{
+    struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+    nvc0->clip.depth_clamp = clip->depth_clamp;
+    nvc0->dirty |= NVC0_NEW_CLIP;
+}
+
+static void
+nvc0_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
+{
+    struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+    nvc0->sample_mask = sample_mask;
+    nvc0->dirty |= NVC0_NEW_SAMPLE_MASK;
+}
+
+
+static void
+nvc0_set_framebuffer_state(struct pipe_context *pipe,
+                           const struct pipe_framebuffer_state *fb)
+{
+    struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+    nvc0->framebuffer = *fb;
+    nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
+}
+
+static void
+nvc0_set_polygon_stipple(struct pipe_context *pipe,
+                         const struct pipe_poly_stipple *stipple)
+{
+    struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+    nvc0->stipple = *stipple;
+    nvc0->dirty |= NVC0_NEW_STIPPLE;
+}
+
+static void
+nvc0_set_scissor_state(struct pipe_context *pipe,
+                       const struct pipe_scissor_state *scissor)
+{
+    struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+    nvc0->scissor = *scissor;
+    nvc0->dirty |= NVC0_NEW_SCISSOR;
+}
+
+static void
+nvc0_set_viewport_state(struct pipe_context *pipe,
+                        const struct pipe_viewport_state *vpt)
+{
+    struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+    nvc0->viewport = *vpt;
+    nvc0->dirty |= NVC0_NEW_VIEWPORT;
+}
+
+static void
+nvc0_set_vertex_buffers(struct pipe_context *pipe,
+                        unsigned count,
+                        const struct pipe_vertex_buffer *vb)
+{
+    struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+    memcpy(nvc0->vtxbuf, vb, sizeof(*vb) * count);
+    nvc0->num_vtxbufs = count;
+
+    nvc0->dirty |= NVC0_NEW_ARRAYS;
+}
+
+static void
+nvc0_set_index_buffer(struct pipe_context *pipe,
+                      const struct pipe_index_buffer *ib)
+{
+    struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+    if (ib)
+        memcpy(&nvc0->idxbuf, ib, sizeof(nvc0->idxbuf));
+    else
+        nvc0->idxbuf.buffer = NULL;
+}
+
+static void
+nvc0_vertex_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+    struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+    nvc0->vertex = hwcso;
+    nvc0->dirty |= NVC0_NEW_VERTEX;
+}
+
+void
+nvc0_init_state_functions(struct nvc0_context *nvc0)
+{
+    nvc0->pipe.create_blend_state = nvc0_blend_state_create;
+    nvc0->pipe.bind_blend_state = nvc0_blend_state_bind;
+    nvc0->pipe.delete_blend_state = nvc0_blend_state_delete;
+
+    nvc0->pipe.create_rasterizer_state = nvc0_rasterizer_state_create;
+    nvc0->pipe.bind_rasterizer_state = nvc0_rasterizer_state_bind;
+    nvc0->pipe.delete_rasterizer_state = nvc0_rasterizer_state_delete;
+
+    nvc0->pipe.create_depth_stencil_alpha_state = nvc0_zsa_state_create;
+    nvc0->pipe.bind_depth_stencil_alpha_state = nvc0_zsa_state_bind;
+    nvc0->pipe.delete_depth_stencil_alpha_state = nvc0_zsa_state_delete;
+
+    nvc0->pipe.create_sampler_state = nvc0_sampler_state_create;
+    nvc0->pipe.delete_sampler_state = nvc0_sampler_state_delete;
+    nvc0->pipe.bind_vertex_sampler_states   = nvc0_vp_sampler_states_bind;
+    nvc0->pipe.bind_fragment_sampler_states = nvc0_fp_sampler_states_bind;
+    nvc0->pipe.bind_geometry_sampler_states = nvc0_gp_sampler_states_bind;
+
+    nvc0->pipe.create_sampler_view = nvc0_create_sampler_view;
+    nvc0->pipe.sampler_view_destroy = nvc0_sampler_view_destroy;
+    nvc0->pipe.set_vertex_sampler_views   = nvc0_vp_set_sampler_views;
+    nvc0->pipe.set_fragment_sampler_views = nvc0_fp_set_sampler_views;
+    nvc0->pipe.set_geometry_sampler_views = nvc0_gp_set_sampler_views;
+
+    nvc0->pipe.create_vs_state = nvc0_vp_state_create;
+    nvc0->pipe.create_fs_state = nvc0_fp_state_create;
+    nvc0->pipe.create_gs_state = nvc0_gp_state_create;
+    nvc0->pipe.bind_vs_state = nvc0_vp_state_bind;
+    nvc0->pipe.bind_fs_state = nvc0_fp_state_bind;
+    nvc0->pipe.bind_gs_state = nvc0_gp_state_bind;
+    nvc0->pipe.delete_vs_state = nvc0_sp_state_delete;
+    nvc0->pipe.delete_fs_state = nvc0_sp_state_delete;
+    nvc0->pipe.delete_gs_state = nvc0_sp_state_delete;
+
+    nvc0->pipe.set_blend_color = nvc0_set_blend_color;
+    nvc0->pipe.set_stencil_ref = nvc0_set_stencil_ref;
+    nvc0->pipe.set_clip_state = nvc0_set_clip_state;
+    nvc0->pipe.set_sample_mask = nvc0_set_sample_mask;
+    nvc0->pipe.set_constant_buffer = nvc0_set_constant_buffer;
+    nvc0->pipe.set_framebuffer_state = nvc0_set_framebuffer_state;
+    nvc0->pipe.set_polygon_stipple = nvc0_set_polygon_stipple;
+    nvc0->pipe.set_scissor_state = nvc0_set_scissor_state;
+    nvc0->pipe.set_viewport_state = nvc0_set_viewport_state;
+
+    nvc0->pipe.create_vertex_elements_state = nvc0_vertex_state_create;
+    nvc0->pipe.delete_vertex_elements_state = nvc0_vertex_state_delete;
+    nvc0->pipe.bind_vertex_elements_state = nvc0_vertex_state_bind;
+
+    nvc0->pipe.set_vertex_buffers = nvc0_set_vertex_buffers;
+    nvc0->pipe.set_index_buffer = nvc0_set_index_buffer;
+}
+
diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c
new file mode 100644 (file)
index 0000000..a024831
--- /dev/null
@@ -0,0 +1,328 @@
+
+#include "nvc0_context.h"
+#include "os/os_time.h"
+
+static void
+nvc0_validate_zcull(struct nvc0_context *nvc0)
+{
+    struct nouveau_channel *chan = nvc0->screen->base.channel;
+    struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
+    struct nvc0_miptree *mt = nvc0_miptree(fb->zsbuf->texture);
+    struct nouveau_bo *bo = mt->base.bo;
+    uint32_t size;
+    uint32_t offset = align(mt->total_size, 1 << 17);
+    unsigned width, height;
+
+    size = mt->total_size * 2;
+
+    height = align(fb->height, 32);
+    width = fb->width % 224;
+    if (width)
+       width = fb->width + (224 - width);
+    else
+       width = fb->width;
+
+    BEGIN_RING(chan, RING_3D_(0x1590), 1); /* ZCULL_REGION_INDEX (bits 0x3f) */
+    OUT_RING  (chan, 0);
+    BEGIN_RING(chan, RING_3D_(0x07e8), 2); /* ZCULL_ADDRESS_A_HIGH */
+    OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+    OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+    offset += 1 << 17;
+    BEGIN_RING(chan, RING_3D_(0x07f0), 2); /* ZCULL_ADDRESS_B_HIGH */
+    OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+    OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+    BEGIN_RING(chan, RING_3D_(0x07e0), 2);
+    OUT_RING  (chan, size);
+    OUT_RING  (chan, size >> 16);
+    BEGIN_RING(chan, RING_3D_(0x15c8), 1); /* bits 0x3 */
+    OUT_RING  (chan, 2);
+    BEGIN_RING(chan, RING_3D_(0x07c0), 4); /* ZCULL dimensions */
+    OUT_RING  (chan, width);
+    OUT_RING  (chan, height);
+    OUT_RING  (chan, 1);
+    OUT_RING  (chan, 0);
+    BEGIN_RING(chan, RING_3D_(0x15fc), 2);
+    OUT_RING  (chan, 0); /* bits 0xffff */
+    OUT_RING  (chan, 0); /* bits 0xffff */
+    BEGIN_RING(chan, RING_3D_(0x1958), 1);
+    OUT_RING  (chan, 0); /* bits ~0 */
+}
+
+static void
+nvc0_validate_fb(struct nvc0_context *nvc0)
+{
+    struct nouveau_channel *chan = nvc0->screen->base.channel;
+    struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
+    unsigned i;
+
+    nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_FRAME);
+
+    BEGIN_RING(chan, RING_3D(RT_CONTROL), 1);
+    OUT_RING  (chan, (076543210 << 4) | fb->nr_cbufs);
+    BEGIN_RING(chan, RING_3D(SCREEN_SCISSOR_HORIZ), 2);
+    OUT_RING  (chan, fb->width << 16);
+    OUT_RING  (chan, fb->height << 16);
+
+    for (i = 0; i < fb->nr_cbufs; ++i) {
+        struct nvc0_miptree *mt = nvc0_miptree(fb->cbufs[i]->texture);
+        struct nouveau_bo *bo = mt->base.bo;
+        unsigned offset = fb->cbufs[i]->offset;
+        
+        BEGIN_RING(chan, RING_3D(RT_ADDRESS_HIGH(i)), 8);
+        OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+        OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+        OUT_RING  (chan, fb->cbufs[i]->width);
+        OUT_RING  (chan, fb->cbufs[i]->height);
+        OUT_RING  (chan, nvc0_format_table[fb->cbufs[i]->format].rt);
+        OUT_RING  (chan, mt->level[fb->cbufs[i]->level].tile_mode);
+        OUT_RING  (chan, 1);
+        OUT_RING  (chan, 0);
+
+        nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base,
+                                 NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+    }
+
+    if (fb->zsbuf) {
+        struct nvc0_miptree *mt = nvc0_miptree(fb->zsbuf->texture);
+        struct nouveau_bo *bo = mt->base.bo;
+        unsigned offset = fb->zsbuf->offset;
+        
+        BEGIN_RING(chan, RING_3D(ZETA_ADDRESS_HIGH), 5);
+        OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+        OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+        OUT_RING  (chan, nvc0_format_table[fb->zsbuf->format].rt);
+        OUT_RING  (chan, mt->level[fb->zsbuf->level].tile_mode);
+        OUT_RING  (chan, 0);
+        BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1);
+        OUT_RING  (chan, 1);
+        BEGIN_RING(chan, RING_3D(ZETA_HORIZ), 3);
+        OUT_RING  (chan, fb->zsbuf->width);
+        OUT_RING  (chan, fb->zsbuf->height);
+        OUT_RING  (chan, (1 << 16) | 1);
+
+        nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_FRAME, &mt->base,
+                                 NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+    } else {
+        BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1);
+        OUT_RING  (chan, 0);
+    }
+
+    BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2);
+    OUT_RING  (chan, fb->width << 16);
+    OUT_RING  (chan, fb->height << 16);
+}
+
+static void
+nvc0_validate_blend_colour(struct nvc0_context *nvc0)
+{
+    struct nouveau_channel *chan = nvc0->screen->base.channel;
+
+    BEGIN_RING(chan, RING_3D(BLEND_COLOR(0)), 4);
+    OUT_RINGf (chan, nvc0->blend_colour.color[0]);
+    OUT_RINGf (chan, nvc0->blend_colour.color[1]);
+    OUT_RINGf (chan, nvc0->blend_colour.color[2]);
+    OUT_RINGf (chan, nvc0->blend_colour.color[3]);    
+}
+
+static void
+nvc0_validate_stencil_ref(struct nvc0_context *nvc0)
+{
+    struct nouveau_channel *chan = nvc0->screen->base.channel;
+
+    BEGIN_RING(chan, RING_3D(STENCIL_FRONT_FUNC_REF), 1);
+    OUT_RING  (chan, nvc0->stencil_ref.ref_value[0]);
+    BEGIN_RING(chan, RING_3D(STENCIL_BACK_FUNC_REF), 1);
+    OUT_RING  (chan, nvc0->stencil_ref.ref_value[1]);
+}
+
+static void
+nvc0_validate_stipple(struct nvc0_context *nvc0)
+{
+    struct nouveau_channel *chan = nvc0->screen->base.channel;
+    unsigned i;
+
+    BEGIN_RING(chan, RING_3D(POLYGON_STIPPLE_PATTERN(0)), 32);
+    for (i = 0; i < 32; ++i)
+        OUT_RING(chan, util_bswap32(nvc0->stipple.stipple[i]));
+}
+
+static void
+nvc0_validate_scissor(struct nvc0_context *nvc0)
+{
+    struct nouveau_channel *chan = nvc0->screen->base.channel;
+    struct pipe_scissor_state *s = &nvc0->scissor;
+
+    if (!(nvc0->dirty & NVC0_NEW_SCISSOR) &&
+       nvc0->state.scissor == nvc0->rast->pipe.scissor)
+       return;
+    nvc0->state.scissor = nvc0->rast->pipe.scissor;
+
+    if (nvc0->state.scissor) {
+       BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2);
+       OUT_RING  (chan, (s->maxx << 16) | s->minx);
+       OUT_RING  (chan, (s->maxy << 16) | s->miny);
+    } else {
+       BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2);
+       OUT_RING  (chan, nvc0->framebuffer.width << 16);
+       OUT_RING  (chan, nvc0->framebuffer.height << 16);
+    }
+}
+
+static void
+nvc0_validate_viewport(struct nvc0_context *nvc0)
+{
+    struct nouveau_channel *chan = nvc0->screen->base.channel;
+
+    BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSLATE_X(0)), 3);
+    OUT_RINGf (chan, nvc0->viewport.translate[0]);
+    OUT_RINGf (chan, nvc0->viewport.translate[1]);
+    OUT_RINGf (chan, nvc0->viewport.translate[2]);
+    BEGIN_RING(chan, RING_3D(VIEWPORT_SCALE_X(0)), 3);
+    OUT_RINGf (chan, nvc0->viewport.scale[0]);
+    OUT_RINGf (chan, nvc0->viewport.scale[1]);
+    OUT_RINGf (chan, nvc0->viewport.scale[2]);
+}
+
+static void
+nvc0_validate_clip(struct nvc0_context *nvc0)
+{
+   struct nouveau_channel *chan = nvc0->screen->base.channel;
+   uint32_t clip;
+
+   clip = nvc0->clip.depth_clamp ? 0x201a : 0x0002;
+
+   BEGIN_RING(chan, RING_3D(VIEW_VOLUME_CLIP_CTRL), 1);
+   OUT_RING  (chan, clip);
+}
+
+static void
+nvc0_validate_blend(struct nvc0_context *nvc0)
+{
+   struct nouveau_channel *chan = nvc0->screen->base.channel;
+
+   WAIT_RING(chan, nvc0->blend->size);
+   OUT_RINGp(chan, nvc0->blend->state, nvc0->blend->size);
+}
+
+static void
+nvc0_validate_zsa(struct nvc0_context *nvc0)
+{
+   struct nouveau_channel *chan = nvc0->screen->base.channel;
+
+   WAIT_RING(chan, nvc0->zsa->size);
+   OUT_RINGp(chan, nvc0->zsa->state, nvc0->zsa->size);
+}
+
+static void
+nvc0_validate_rasterizer(struct nvc0_context *nvc0)
+{
+   struct nouveau_channel *chan = nvc0->screen->base.channel;
+
+   WAIT_RING(chan, nvc0->rast->size);
+   OUT_RINGp(chan, nvc0->rast->state, nvc0->rast->size);
+}
+
+static void
+nvc0_constbufs_validate(struct nvc0_context *nvc0)
+{
+   struct nouveau_channel *chan = nvc0->screen->base.channel;
+   struct nouveau_bo *bo;
+   unsigned s;
+
+   for (s = 0; s < 5; ++s) {
+      struct nvc0_resource *res;
+      int i, j;
+
+      while (nvc0->constbuf_dirty[s]) {
+         unsigned offset = 0;
+         i = ffs(nvc0->constbuf_dirty[s]) - 1;
+         nvc0->constbuf_dirty[s] &= ~(1 << i);
+
+         res = nvc0_resource(nvc0->constbuf[s][i]);
+         if (!res) {
+            BEGIN_RING(chan, RING_3D(CB_BIND(s)), 1);
+            OUT_RING  (chan, (i << 4) | 0);
+            continue;
+         }
+
+         if (i == 0 && !nvc0_resource_mapped_by_gpu(&res->base)) {
+            offset = s << 16;
+            bo = nvc0->screen->uniforms;
+         } else {
+            bo = res->bo;
+            nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_CONSTANT, res,
+                                     NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+         }
+
+         BEGIN_RING(chan, RING_3D(CB_SIZE), 3);
+         OUT_RING  (chan, align(res->base.width0, 0x100));
+         OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+         OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+         BEGIN_RING(chan, RING_3D(CB_BIND(s)), 1);
+         OUT_RING  (chan, (i << 4) | 1);
+
+         BEGIN_RING(chan, RING_3D(CB_SIZE), 4);
+         OUT_RING  (chan, align(res->base.width0, 0x100));
+         OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+         OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+         OUT_RING  (chan, 0);
+        BEGIN_RING_NI(chan, RING_3D(CB_DATA(0)), res->base.width0 / 4);
+         for (j = 0; j < res->base.width0 / 4; ++j)
+            OUT_RING(chan, ((uint32_t *)res->data)[j]);
+      }
+   }
+}
+
+static struct state_validate {
+    void (*func)(struct nvc0_context *);
+    uint32_t states;
+} validate_list[] = {
+    { nvc0_validate_fb,            NVC0_NEW_FRAMEBUFFER },
+    { nvc0_validate_blend,         NVC0_NEW_BLEND },
+    { nvc0_validate_zsa,           NVC0_NEW_ZSA },
+    { nvc0_validate_rasterizer,    NVC0_NEW_RASTERIZER },
+    { nvc0_validate_blend_colour,  NVC0_NEW_BLEND_COLOUR },
+    { nvc0_validate_stencil_ref,   NVC0_NEW_STENCIL_REF },
+    { nvc0_validate_stipple,       NVC0_NEW_STIPPLE },
+    { nvc0_validate_scissor,       NVC0_NEW_SCISSOR | NVC0_NEW_FRAMEBUFFER |
+                                   NVC0_NEW_RASTERIZER },
+    { nvc0_validate_viewport,      NVC0_NEW_VIEWPORT },
+    { nvc0_validate_clip,          NVC0_NEW_CLIP },
+    { nvc0_vertprog_validate,      NVC0_NEW_VERTPROG },
+    { nvc0_tctlprog_validate,      NVC0_NEW_TCTLPROG },
+    { nvc0_tevlprog_validate,      NVC0_NEW_TEVLPROG },
+    { nvc0_gmtyprog_validate,      NVC0_NEW_GMTYPROG },
+    { nvc0_fragprog_validate,      NVC0_NEW_FRAGPROG },
+    { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS },
+    { nvc0_validate_textures,      NVC0_NEW_TEXTURES },
+    { nvc0_validate_samplers,      NVC0_NEW_SAMPLERS },
+    { nvc0_constbufs_validate,     NVC0_NEW_CONSTBUF }
+};
+#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
+
+boolean
+nvc0_state_validate(struct nvc0_context *nvc0)
+{
+   unsigned i;
+#if 0
+   if (nvc0->screen->cur_ctx != nvc0) /* FIXME: not everything is valid */
+      nvc0->dirty = 0xffffffff;
+#endif
+   nvc0->screen->cur_ctx = nvc0;
+
+   if (nvc0->dirty) {
+      FIRE_RING(nvc0->screen->base.channel);
+
+      for (i = 0; i < validate_list_len; ++i) {
+         struct state_validate *validate = &validate_list[i];
+
+         if (nvc0->dirty & validate->states)
+            validate->func(nvc0);
+      }
+      nvc0->dirty = 0;
+   }
+
+   nvc0_bufctx_emit_relocs(nvc0);
+
+   return TRUE;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nvc0/nvc0_stateobj.h
new file mode 100644 (file)
index 0000000..41084f3
--- /dev/null
@@ -0,0 +1,79 @@
+
+#ifndef __NVC0_STATEOBJ_H__
+#define __NVC0_STATEOBJ_H__
+
+#include "pipe/p_state.h"
+
+#define SB_BEGIN_3D(so, m, s)                                                  \
+   (so)->state[(so)->size++] =                                                 \
+      (0x2 << 28) | ((s) << 16) | (NVC0_SUBCH_3D << 13) | ((NVC0_3D_##m) >> 2)
+
+#define SB_OUT_3D(so, m, d)                                                    \
+   (so)->state[(so)->size++] =                                                 \
+      (0x8 << 28) | ((d) << 16) | (NVC0_SUBCH_3D << 13) | ((NVC0_3D_##m) >> 2)
+
+#define SB_DATA(so, u) (so)->state[(so)->size++] = (u)
+
+struct nvc0_blend_stateobj {
+   struct pipe_blend_state pipe;
+   int size;
+   uint32_t state[72];
+};
+
+struct nvc0_tsc_entry {
+   int id;
+   uint32_t tsc[8];
+};
+
+static INLINE struct nvc0_tsc_entry *
+nvc0_tsc_entry(void *hwcso)
+{
+   return (struct nvc0_tsc_entry *)hwcso;
+}
+
+struct nvc0_tic_entry {
+   struct pipe_sampler_view pipe;
+   int id;
+   uint32_t tic[8];
+};
+
+static INLINE struct nvc0_tic_entry *
+nvc0_tic_entry(struct pipe_sampler_view *view)
+{
+   return (struct nvc0_tic_entry *)view;
+}
+
+struct nvc0_rasterizer_stateobj {
+   struct pipe_rasterizer_state pipe;
+   int size;
+   uint32_t state[42];
+};
+
+struct nvc0_zsa_stateobj {
+   struct pipe_depth_stencil_alpha_state pipe;
+   int size;
+   uint32_t state[29];
+};
+
+struct nvc0_vertex_element {
+   struct pipe_vertex_element pipe;
+   uint32_t state;
+};
+
+struct nvc0_vertex_stateobj {
+   struct translate *translate;
+   unsigned num_elements;
+   uint32_t instance_bits;
+   unsigned vtx_size;
+   unsigned vtx_per_packet_max;
+   struct nvc0_vertex_element element[1];
+};
+
+/* will have to lookup index -> location qualifier from nvc0_program */
+struct nvc0_tfb_state {
+   uint8_t varying_count[4];
+   uint32_t stride[4];
+   uint8_t varying_indices[1];
+};
+
+#endif
diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c
new file mode 100644 (file)
index 0000000..b52b098
--- /dev/null
@@ -0,0 +1,361 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdint.h>
+
+#include "pipe/p_defines.h"
+
+#include "util/u_inlines.h"
+#include "util/u_pack_color.h"
+#include "util/u_format.h"
+
+#include "nvc0_context.h"
+#include "nvc0_resource.h"
+
+#include "nv50_defs.xml.h"
+
+/* return TRUE for formats that can be converted among each other by NVC0_2D */
+static INLINE boolean
+nvc0_2d_format_faithful(enum pipe_format format)
+{
+   switch (format) {
+   case PIPE_FORMAT_B8G8R8A8_UNORM:
+   case PIPE_FORMAT_B8G8R8X8_UNORM:
+   case PIPE_FORMAT_B8G8R8A8_SRGB:
+   case PIPE_FORMAT_B8G8R8X8_SRGB:
+   case PIPE_FORMAT_B5G6R5_UNORM:
+   case PIPE_FORMAT_B5G5R5A1_UNORM:
+   case PIPE_FORMAT_B10G10R10A2_UNORM:
+   case PIPE_FORMAT_R8_UNORM:
+   case PIPE_FORMAT_R32G32B32A32_FLOAT:
+   case PIPE_FORMAT_R32G32B32_FLOAT:
+      return TRUE;
+   default:
+      return FALSE;
+   }
+}
+
+static INLINE uint8_t
+nvc0_2d_format(enum pipe_format format)
+{
+   uint8_t id = nvc0_format_table[format].rt;
+
+   /* Hardware values for color formats range from 0xc0 to 0xff,
+    * but the 2D engine doesn't support all of them.
+    */
+   if ((id >= 0xc0) && (0xff0843e080608409ULL & (1ULL << (id - 0xc0))))
+      return id;
+
+   switch (util_format_get_blocksize(format)) {
+   case 1:
+      return NV50_SURFACE_FORMAT_R8_UNORM;
+   case 2:
+      return NV50_SURFACE_FORMAT_R16_UNORM;
+   case 4:
+      return NV50_SURFACE_FORMAT_A8R8G8B8_UNORM;
+   default:
+      return 0;
+   }
+}
+
+static int
+nvc0_surface_set(struct nvc0_screen *screen, struct pipe_surface *ps, int dst)
+{
+   struct nvc0_miptree *mt = nvc0_miptree(ps->texture);
+   struct nouveau_channel *chan = screen->base.channel;
+   struct nouveau_bo *bo = nvc0_miptree(ps->texture)->base.bo;
+   int format, mthd = dst ? NVC0_2D_DST_FORMAT : NVC0_2D_SRC_FORMAT;
+   int flags = NOUVEAU_BO_VRAM | (dst ? NOUVEAU_BO_WR : NOUVEAU_BO_RD);
+
+   format = nvc0_2d_format(ps->format);
+   if (!format) {
+      NOUVEAU_ERR("invalid/unsupported surface format: %s\n",
+                  util_format_name(ps->format));
+      return 1;
+   }
+
+   if (!bo->tile_flags) {
+      BEGIN_RING(chan, RING_2D_(mthd), 2);
+      OUT_RING  (chan, format);
+      OUT_RING  (chan, 1);
+      BEGIN_RING(chan, RING_2D_(mthd + 0x14), 5);
+      OUT_RING  (chan, mt->level[ps->level].pitch);
+      OUT_RING  (chan, ps->width);
+      OUT_RING  (chan, ps->height);
+      OUT_RELOCh(chan, bo, ps->offset, flags);
+      OUT_RELOCl(chan, bo, ps->offset, flags);
+   } else {
+      BEGIN_RING(chan, RING_2D_(mthd), 5);
+      OUT_RING  (chan, format);
+      OUT_RING  (chan, 0);
+      OUT_RING  (chan, mt->level[ps->level].tile_mode);
+      OUT_RING  (chan, 1);
+      OUT_RING  (chan, 0);
+      BEGIN_RING(chan, RING_2D_(mthd + 0x18), 4);
+      OUT_RING  (chan, ps->width);
+      OUT_RING  (chan, ps->height);
+      OUT_RELOCh(chan, bo, ps->offset, flags);
+      OUT_RELOCl(chan, bo, ps->offset, flags);
+   }
+#if 0
+   if (dst) {
+      BEGIN_RING(chan, RING_2D_(NVC0_2D_CLIP_X), 4);
+      OUT_RING  (chan, 0);
+      OUT_RING  (chan, 0);
+      OUT_RING  (chan, surf->width);
+      OUT_RING  (chan, surf->height);
+   }
+#endif
+   return 0;
+}
+
+static int
+nvc0_surface_do_copy(struct nvc0_screen *screen,
+                     struct pipe_surface *dst, int dx, int dy,
+                     struct pipe_surface *src, int sx, int sy,
+                     int w, int h)
+{
+   struct nouveau_channel *chan = screen->base.channel;
+   int ret;
+
+   ret = MARK_RING(chan, 2*16 + 32, 4);
+   if (ret)
+      return ret;
+
+   ret = nvc0_surface_set(screen, dst, 1);
+   if (ret)
+      return ret;
+
+   ret = nvc0_surface_set(screen, src, 0);
+   if (ret)
+      return ret;
+
+   /* 0/1 = CENTER/CORNER, 10/00 = POINT/BILINEAR */
+   BEGIN_RING(chan, RING_2D(BLIT_CONTROL), 1);
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_2D(BLIT_DST_X), 4);
+   OUT_RING  (chan, dx);
+   OUT_RING  (chan, dy);
+   OUT_RING  (chan, w);
+   OUT_RING  (chan, h);
+   BEGIN_RING(chan, RING_2D(BLIT_DU_DX_FRACT), 4);
+   OUT_RING  (chan, 0);
+   OUT_RING  (chan, 1);
+   OUT_RING  (chan, 0);
+   OUT_RING  (chan, 1);
+   BEGIN_RING(chan, RING_2D(BLIT_SRC_X_FRACT), 4);
+   OUT_RING  (chan, 0);
+   OUT_RING  (chan, sx);
+   OUT_RING  (chan, 0);
+   OUT_RING  (chan, sy);
+
+   return 0;
+}
+
+static void
+nvc0_surface_copy(struct pipe_context *pipe,
+                 struct pipe_resource *dest, struct pipe_subresource subdst,
+                 unsigned destx, unsigned desty, unsigned destz,
+                 struct pipe_resource *src, struct pipe_subresource subsrc,
+                 unsigned srcx, unsigned srcy, unsigned srcz,
+                 unsigned width, unsigned height)
+{
+   struct nvc0_context *nv50 = nvc0_context(pipe);
+   struct nvc0_screen *screen = nv50->screen;
+   struct pipe_surface *ps_dst, *ps_src;
+
+   assert((src->format == dest->format) ||
+          (nvc0_2d_format_faithful(src->format) &&
+           nvc0_2d_format_faithful(dest->format)));
+
+   ps_src = nvc0_miptree_surface_new(pipe->screen, src, subsrc.face,
+                                     subsrc.level, srcz, 0 /* bind flags */);
+   ps_dst = nvc0_miptree_surface_new(pipe->screen, dest, subdst.face,
+                                     subdst.level, destz, 0 /* bind flags */);
+
+   nvc0_surface_do_copy(screen, ps_dst, destx, desty, ps_src, srcx,
+                        srcy, width, height);
+
+   nvc0_miptree_surface_del(ps_src);
+   nvc0_miptree_surface_del(ps_dst);
+}
+
+static void
+nvc0_clear_render_target(struct pipe_context *pipe,
+                        struct pipe_surface *dst,
+                        const float *rgba,
+                        unsigned dstx, unsigned dsty,
+                        unsigned width, unsigned height)
+{
+       struct nvc0_context *nv50 = nvc0_context(pipe);
+       struct nvc0_screen *screen = nv50->screen;
+       struct nouveau_channel *chan = screen->base.channel;
+       struct nvc0_miptree *mt = nvc0_miptree(dst->texture);
+       struct nouveau_bo *bo = mt->base.bo;
+
+       BEGIN_RING(chan, RING_3D(CLEAR_COLOR(0)), 4);
+       OUT_RINGf (chan, rgba[0]);
+       OUT_RINGf (chan, rgba[1]);
+       OUT_RINGf (chan, rgba[2]);
+       OUT_RINGf (chan, rgba[3]);
+
+       if (MARK_RING(chan, 18, 2))
+               return;
+
+       BEGIN_RING(chan, RING_3D(RT_CONTROL), 1);
+       OUT_RING  (chan, 1);
+       BEGIN_RING(chan, RING_3D(RT_ADDRESS_HIGH(0)), 8);
+       OUT_RELOCh(chan, bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+       OUT_RELOCl(chan, bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+       OUT_RING  (chan, dst->width);
+       OUT_RING  (chan, dst->height);
+       OUT_RING  (chan, nvc0_format_table[dst->format].rt);
+       OUT_RING  (chan, mt->level[dst->level].tile_mode);
+       OUT_RING  (chan, 1);
+       OUT_RING  (chan, 0);
+
+       /* NOTE: only works with D3D clear flag (5097/0x143c bit 4) */
+
+       BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2);
+       OUT_RING  (chan, (width << 16) | dstx);
+       OUT_RING  (chan, (height << 16) | dsty);
+
+       BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1);
+       OUT_RING  (chan, 0x3c);
+
+       nv50->dirty |= NVC0_NEW_FRAMEBUFFER;
+}
+
+static void
+nvc0_clear_depth_stencil(struct pipe_context *pipe,
+                         struct pipe_surface *dst,
+                         unsigned clear_flags,
+                         double depth,
+                         unsigned stencil,
+                         unsigned dstx, unsigned dsty,
+                         unsigned width, unsigned height)
+{
+       struct nvc0_context *nv50 = nvc0_context(pipe);
+       struct nvc0_screen *screen = nv50->screen;
+       struct nouveau_channel *chan = screen->base.channel;
+       struct nvc0_miptree *mt = nvc0_miptree(dst->texture);
+       struct nouveau_bo *bo = mt->base.bo;
+       uint32_t mode = 0;
+
+       if (clear_flags & PIPE_CLEAR_DEPTH) {
+               BEGIN_RING(chan, RING_3D(CLEAR_DEPTH), 1);
+               OUT_RINGf (chan, depth);
+               mode |= NVC0_3D_CLEAR_BUFFERS_Z;
+       }
+
+       if (clear_flags & PIPE_CLEAR_STENCIL) {
+               BEGIN_RING(chan, RING_3D(CLEAR_STENCIL), 1);
+               OUT_RING  (chan, stencil & 0xff);
+               mode |= NVC0_3D_CLEAR_BUFFERS_S;
+       }
+
+       if (MARK_RING(chan, 17, 2))
+               return;
+
+       BEGIN_RING(chan, RING_3D(ZETA_ADDRESS_HIGH), 5);
+       OUT_RELOCh(chan, bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+       OUT_RELOCl(chan, bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+       OUT_RING  (chan, nvc0_format_table[dst->format].rt);
+       OUT_RING  (chan, mt->level[dst->level].tile_mode);
+       OUT_RING  (chan, 0);
+       BEGIN_RING(chan, RING_3D(ZETA_ENABLE), 1);
+       OUT_RING  (chan, 1);
+       BEGIN_RING(chan, RING_3D(ZETA_HORIZ), 3);
+       OUT_RING  (chan, dst->width);
+       OUT_RING  (chan, dst->height);
+       OUT_RING  (chan, (1 << 16) | 1);
+
+       BEGIN_RING(chan, RING_3D(VIEWPORT_HORIZ(0)), 2);
+       OUT_RING  (chan, (width << 16) | dstx);
+       OUT_RING  (chan, (height << 16) | dsty);
+
+       BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1);
+       OUT_RING  (chan, mode);
+
+       nv50->dirty |= NVC0_NEW_FRAMEBUFFER;
+}
+
+void
+nvc0_clear(struct pipe_context *pipe, unsigned buffers,
+           const float *rgba, double depth, unsigned stencil)
+{
+   struct nvc0_context *nvc0 = nvc0_context(pipe);
+   struct nouveau_channel *chan = nvc0->screen->base.channel;
+   struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
+   unsigned i;
+   const unsigned dirty = nvc0->dirty;
+   uint32_t mode = 0;
+
+   /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */
+   nvc0->dirty &= NVC0_NEW_FRAMEBUFFER;
+   if (!nvc0_state_validate(nvc0))
+      return;
+
+   if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
+      BEGIN_RING(chan, RING_3D(CLEAR_COLOR(0)), 4);
+      OUT_RINGf (chan, rgba[0]);
+      OUT_RINGf (chan, rgba[1]);
+      OUT_RINGf (chan, rgba[2]);
+      OUT_RINGf (chan, rgba[3]);
+      mode =
+         NVC0_3D_CLEAR_BUFFERS_R | NVC0_3D_CLEAR_BUFFERS_G |
+         NVC0_3D_CLEAR_BUFFERS_B | NVC0_3D_CLEAR_BUFFERS_A;
+   }
+
+   if (buffers & PIPE_CLEAR_DEPTH) {
+      BEGIN_RING(chan, RING_3D(CLEAR_DEPTH), 1);
+      OUT_RING  (chan, fui(depth));
+      mode |= NVC0_3D_CLEAR_BUFFERS_Z;
+   }
+
+   if (buffers & PIPE_CLEAR_STENCIL) {
+      BEGIN_RING(chan, RING_3D(CLEAR_STENCIL), 1);
+      OUT_RING  (chan, stencil & 0xff);
+      mode |= NVC0_3D_CLEAR_BUFFERS_S;
+   }
+
+   BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1);
+   OUT_RING  (chan, mode);
+
+   for (i = 1; i < fb->nr_cbufs; i++) {
+      BEGIN_RING(chan, RING_3D(CLEAR_BUFFERS), 1);
+      OUT_RING  (chan, (i << 6) | 0x3c);
+   }
+
+   nvc0->dirty = dirty & ~NVC0_NEW_FRAMEBUFFER;
+}
+
+void
+nvc0_init_surface_functions(struct nvc0_context *nvc0)
+{
+       nvc0->pipe.resource_copy_region = nvc0_surface_copy;
+       nvc0->pipe.clear_render_target = nvc0_clear_render_target;
+       nvc0->pipe.clear_depth_stencil = nvc0_clear_depth_stencil;
+}
+
+
diff --git a/src/gallium/drivers/nvc0/nvc0_tex.c b/src/gallium/drivers/nvc0/nvc0_tex.c
new file mode 100644 (file)
index 0000000..c766b64
--- /dev/null
@@ -0,0 +1,251 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nvc0_context.h"
+#include "nvc0_resource.h"
+#include "nv50_texture.xml.h"
+
+#include "util/u_format.h"
+
+static INLINE uint32_t
+nv50_tic_swizzle(uint32_t tc, unsigned swz)
+{
+   switch (swz) {
+   case PIPE_SWIZZLE_RED:
+      return (tc & NV50_TIC_0_MAPR__MASK) >> NV50_TIC_0_MAPR__SHIFT;
+   case PIPE_SWIZZLE_GREEN:
+      return (tc & NV50_TIC_0_MAPG__MASK) >> NV50_TIC_0_MAPG__SHIFT;
+   case PIPE_SWIZZLE_BLUE:
+      return (tc & NV50_TIC_0_MAPB__MASK) >> NV50_TIC_0_MAPB__SHIFT;
+   case PIPE_SWIZZLE_ALPHA:
+      return (tc & NV50_TIC_0_MAPA__MASK) >> NV50_TIC_0_MAPA__SHIFT;
+   case PIPE_SWIZZLE_ONE:
+      return NV50_TIC_MAP_ONE;
+   case PIPE_SWIZZLE_ZERO:
+   default:
+      return NV50_TIC_MAP_ZERO;
+   }
+}
+
+struct pipe_sampler_view *
+nvc0_create_sampler_view(struct pipe_context *pipe,
+                         struct pipe_resource *texture,
+                         const struct pipe_sampler_view *templ)
+{
+   const struct util_format_description *desc;
+   uint32_t *tic;
+   uint32_t swz[4];
+   struct nvc0_tic_entry *view;
+   struct nvc0_miptree *mt = nvc0_miptree(texture);
+
+   view = MALLOC_STRUCT(nvc0_tic_entry);
+   if (!view)
+      return NULL;
+
+   view->pipe = *templ;
+   view->pipe.reference.count = 1;
+   view->pipe.texture = NULL;
+   view->pipe.context = pipe;
+
+   view->id = -1;
+
+   pipe_resource_reference(&view->pipe.texture, texture);
+
+   tic = &view->tic[0];
+
+   desc = util_format_description(mt->base.base.format);
+
+   /* TIC[0] */
+
+   tic[0] = nvc0_format_table[view->pipe.format].tic;
+
+   swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r);
+   swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g);
+   swz[2] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_b);
+   swz[3] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_a);
+   tic[0] = (tic[0] & ~NV50_TIC_0_SWIZZLE__MASK) |
+      (swz[0] << NV50_TIC_0_MAPR__SHIFT) |
+      (swz[1] << NV50_TIC_0_MAPG__SHIFT) |
+      (swz[2] << NV50_TIC_0_MAPB__SHIFT) |
+      (swz[3] << NV50_TIC_0_MAPA__SHIFT);
+
+   tic[1] = nouveau_bo_gpu_address(mt->base.bo);
+   tic[2] = nouveau_bo_gpu_address(mt->base.bo) >> 32;
+
+   tic[2] |= 0x10001000 | /* NV50_TIC_2_NO_BORDER */ 0x40000000;
+
+   if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
+      tic[2] |= NV50_TIC_2_COLORSPACE_SRGB;
+
+   if (mt->base.base.target != PIPE_TEXTURE_RECT)
+      tic[2] |= NV50_TIC_2_NORMALIZED_COORDS;
+
+   tic[2] |=
+      ((mt->base.bo->tile_mode & 0x0f0) << (22 - 4)) |
+      ((mt->base.bo->tile_mode & 0xf00) << (21 - 4));
+
+   switch (mt->base.base.target) {
+   case PIPE_TEXTURE_1D:
+      tic[2] |= NV50_TIC_2_TARGET_1D;
+      break;
+   case PIPE_TEXTURE_2D:
+      tic[2] |= NV50_TIC_2_TARGET_2D;
+      break;
+   case PIPE_TEXTURE_RECT:
+      tic[2] |= NV50_TIC_2_TARGET_RECT;
+      break;
+   case PIPE_TEXTURE_3D:
+      tic[2] |= NV50_TIC_2_TARGET_3D;
+      break;
+   case PIPE_TEXTURE_CUBE:
+      tic[2] |= NV50_TIC_2_TARGET_CUBE;
+      break;
+   case PIPE_BUFFER:
+      tic[2] |= NV50_TIC_2_TARGET_BUFFER | /* NV50_TIC_2_LINEAR */ (1 << 18);
+   default:
+      NOUVEAU_ERR("invalid texture target: %d\n", mt->base.base.target);
+      return FALSE;
+   }
+
+   if (mt->base.base.target == PIPE_BUFFER)
+      tic[3] = mt->base.base.width0;
+   else
+      tic[3] = 0x00300000;
+
+   tic[4] = (1 << 31) | mt->base.base.width0;
+
+   tic[5] = mt->base.base.height0 & 0xffff;
+   tic[5] |= mt->base.base.depth0 << 16;
+   tic[5] |= mt->base.base.last_level << 28;
+
+   tic[6] = 0x03000000;
+
+   tic[7] = (view->pipe.last_level << 4) | view->pipe.first_level;
+
+   return &view->pipe;
+}
+
+static boolean
+nvc0_validate_tic(struct nvc0_context *nvc0, int s)
+{
+   struct nouveau_channel *chan = nvc0->screen->base.channel;
+   unsigned i;
+   boolean need_flush = FALSE;
+
+   for (i = 0; i < nvc0->num_textures[s]; ++i) {
+      struct nvc0_tic_entry *tic = nvc0_tic_entry(nvc0->textures[s][i]);
+      struct nvc0_resource *res;
+
+      if (!tic) {
+         BEGIN_RING(chan, RING_3D(BIND_TIC(s)), 1);
+         OUT_RING  (chan, (i << 1) | 0);
+         continue;
+      }
+      res = &nvc0_miptree(tic->pipe.texture)->base;
+
+      if (tic->id < 0) {
+         tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
+
+         nvc0_m2mf_push_linear(nvc0, nvc0->screen->txc, NOUVEAU_BO_VRAM,
+                               tic->id * 32, 32, tic->tic);
+         need_flush = TRUE;
+      }
+      nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
+
+      nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_TEXTURES, res,
+                               NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+
+      BEGIN_RING(chan, RING_3D(BIND_TIC(s)), 1);
+      OUT_RING  (chan, (tic->id << 9) | (i << 1) | 1);
+   }
+   for (; i < nvc0->state.num_textures[s]; ++i) {
+      BEGIN_RING(chan, RING_3D(BIND_TIC(s)), 1);
+      OUT_RING  (chan, (i << 1) | 0);
+   }
+   nvc0->state.num_textures[s] = nvc0->num_textures[s];
+
+   return need_flush;
+}
+
+void nvc0_validate_textures(struct nvc0_context *nvc0)
+{
+   boolean need_flush;
+
+   nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_TEXTURES);
+
+   need_flush  = nvc0_validate_tic(nvc0, 0);
+   need_flush |= nvc0_validate_tic(nvc0, 4);
+
+   if (need_flush) {
+      BEGIN_RING(nvc0->screen->base.channel, RING_3D(TIC_FLUSH), 1);
+      OUT_RING  (nvc0->screen->base.channel, 0);
+   }
+}
+
+static boolean
+nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
+{
+   struct nouveau_channel *chan = nvc0->screen->base.channel;
+   unsigned i;
+   boolean need_flush = FALSE;
+
+   for (i = 0; i < nvc0->num_samplers[s]; ++i) {
+      struct nvc0_tsc_entry *tsc = nvc0_tsc_entry(nvc0->samplers[s][i]);
+
+      if (!tsc) {
+         BEGIN_RING(chan, RING_3D(BIND_TSC(s)), 1);
+         OUT_RING  (chan, (i << 4) | 0);
+         continue;
+      }
+      if (tsc->id < 0) {
+         tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
+
+         nvc0_m2mf_push_linear(nvc0, nvc0->screen->txc, NOUVEAU_BO_VRAM,
+                               65536 + tsc->id * 32, 32, tsc->tsc);
+         need_flush = TRUE;
+      }
+      nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
+
+      BEGIN_RING(chan, RING_3D(BIND_TSC(s)), 1);
+      OUT_RING  (chan, (tsc->id << 12) | (i << 4) | 1);
+   }
+   for (; i < nvc0->state.num_samplers[s]; ++i) {
+      BEGIN_RING(chan, RING_3D(BIND_TSC(s)), 1);
+      OUT_RING  (chan, (i << 4) | 0);
+   }
+   nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
+
+   return need_flush;
+}
+
+void nvc0_validate_samplers(struct nvc0_context *nvc0)
+{
+   boolean need_flush;
+
+   need_flush  = nvc0_validate_tsc(nvc0, 0);
+   need_flush |= nvc0_validate_tsc(nvc0, 4);
+
+   if (need_flush) {
+      BEGIN_RING(nvc0->screen->base.channel, RING_3D(TSC_FLUSH), 1);
+      OUT_RING  (nvc0->screen->base.channel, 0);
+   }
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c
new file mode 100644 (file)
index 0000000..1b161f6
--- /dev/null
@@ -0,0 +1,1890 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <unistd.h>
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+#include "tgsi/tgsi_dump.h"
+#include "util/u_dynarray.h"
+
+#include "nvc0_pc.h"
+#include "nvc0_program.h"
+
+/* Arbitrary internal limits. */
+#define BLD_MAX_TEMPS 64
+#define BLD_MAX_ADDRS 4
+#define BLD_MAX_PREDS 4
+#define BLD_MAX_IMMDS 128
+#define BLD_MAX_OUTPS PIPE_MAX_SHADER_OUTPUTS
+
+#define BLD_MAX_COND_NESTING 8
+#define BLD_MAX_LOOP_NESTING 4
+#define BLD_MAX_CALL_NESTING 2
+
+/* This structure represents a TGSI register. */
+struct bld_register {
+   struct nv_value *current;
+   /* collect all SSA values assigned to it */
+   struct util_dynarray vals;
+   /* 1 bit per loop level, indicates if used/defd, reset when loop ends */
+   uint16_t loop_use;
+   uint16_t loop_def;
+};
+
+static INLINE struct nv_value **
+bld_register_access(struct bld_register *reg, unsigned i)
+{
+   return util_dynarray_element(&reg->vals, struct nv_value *, i);
+}
+
+static INLINE void
+bld_register_add_val(struct bld_register *reg, struct nv_value *val)
+{
+   util_dynarray_append(&reg->vals, struct nv_value *, val);
+}
+
+static INLINE boolean
+bld_register_del_val(struct bld_register *reg, struct nv_value *val)
+{
+   unsigned i;
+
+   for (i = reg->vals.size / sizeof(struct nv_value *); i > 0; --i)
+      if (*bld_register_access(reg, i - 1) == val)
+         break;
+   if (!i)
+      return FALSE;
+
+   if (i != reg->vals.size / sizeof(struct nv_value *))
+      *bld_register_access(reg, i - 1) = util_dynarray_pop(&reg->vals,
+                                                           struct nv_value *);
+   else
+      reg->vals.size -= sizeof(struct nv_value *);
+
+   return TRUE;
+}
+
+struct bld_context {
+   struct nvc0_translation_info *ti;
+
+   struct nv_pc *pc;
+   struct nv_basic_block *b;
+
+   struct tgsi_parse_context parse[BLD_MAX_CALL_NESTING];
+   int call_lvl;
+
+   struct nv_basic_block *cond_bb[BLD_MAX_COND_NESTING];
+   struct nv_basic_block *join_bb[BLD_MAX_COND_NESTING];
+   struct nv_basic_block *else_bb[BLD_MAX_COND_NESTING];
+   int cond_lvl;
+   struct nv_basic_block *loop_bb[BLD_MAX_LOOP_NESTING];
+   struct nv_basic_block *brkt_bb[BLD_MAX_LOOP_NESTING];
+   int loop_lvl;
+
+   ubyte out_kind; /* CFG_EDGE_FORWARD, or FAKE in case of BREAK/CONT */
+
+   struct bld_register tvs[BLD_MAX_TEMPS][4]; /* TGSI_FILE_TEMPORARY */
+   struct bld_register avs[BLD_MAX_ADDRS][4]; /* TGSI_FILE_ADDRESS */
+   struct bld_register pvs[BLD_MAX_PREDS][4]; /* TGSI_FILE_PREDICATE */
+   struct bld_register ovs[BLD_MAX_OUTPS][4]; /* TGSI_FILE_OUTPUT, FP only */
+
+   uint32_t outputs_written[(PIPE_MAX_SHADER_OUTPUTS + 7) / 8];
+
+   struct nv_value *zero;
+   struct nv_value *frag_coord[4];
+
+   /* wipe on new BB */
+   struct nv_value *saved_sysvals[4];
+   struct nv_value *saved_addr[4][2];
+   struct nv_value *saved_inputs[PIPE_MAX_SHADER_INPUTS][4];
+   struct nv_value *saved_immd[BLD_MAX_IMMDS];
+   uint num_immds;
+};
+
+static INLINE ubyte
+bld_register_file(struct bld_context *bld, struct bld_register *reg)
+{
+   if (reg < &bld->avs[0][0]) return NV_FILE_GPR;
+   else
+   if (reg < &bld->pvs[0][0]) return NV_FILE_GPR;
+   else
+   if (reg < &bld->ovs[0][0]) return NV_FILE_PRED;
+   else
+      return NV_FILE_MEM_V;
+}
+
+static INLINE struct nv_value *
+bld_fetch(struct bld_context *bld, struct bld_register *regs, int i, int c)
+{
+   regs[i * 4 + c].loop_use |= 1 << bld->loop_lvl;
+   return regs[i * 4 + c].current;
+}
+
+static struct nv_value *
+bld_loop_phi(struct bld_context *, struct bld_register *, struct nv_value *);
+
+/* If a variable is defined in a loop without prior use, we don't need
+ * a phi in the loop header to account for backwards flow.
+ *
+ * However, if this variable is then also used outside the loop, we do
+ * need a phi after all. But we must not use this phi's def inside the
+ * loop, so we can eliminate the phi if it is unused later.
+ */
+static INLINE void
+bld_store(struct bld_context *bld,
+          struct bld_register *regs, int i, int c, struct nv_value *val)
+{
+   const uint16_t m = 1 << bld->loop_lvl;
+   struct bld_register *reg = &regs[i * 4 + c];
+
+   if (bld->loop_lvl && !(m & (reg->loop_def | reg->loop_use)))
+      bld_loop_phi(bld, reg, val);
+
+   reg->current = val;
+   bld_register_add_val(reg, reg->current);
+
+   reg->loop_def |= 1 << bld->loop_lvl;
+}
+
+#define FETCH_TEMP(i, c)    bld_fetch(bld, &bld->tvs[0][0], i, c)
+#define STORE_TEMP(i, c, v) bld_store(bld, &bld->tvs[0][0], i, c, (v))
+#define FETCH_ADDR(i, c)    bld_fetch(bld, &bld->avs[0][0], i, c)
+#define STORE_ADDR(i, c, v) bld_store(bld, &bld->avs[0][0], i, c, (v))
+#define FETCH_PRED(i, c)    bld_fetch(bld, &bld->pvs[0][0], i, c)
+#define STORE_PRED(i, c, v) bld_store(bld, &bld->pvs[0][0], i, c, (v))
+#define STORE_OUTP(i, c, v)                                         \
+   do {                                                             \
+      bld_store(bld, &bld->ovs[0][0], i, c, (v));                   \
+      bld->outputs_written[(i) / 8] |= 1 << (((i) * 4 + (c)) % 32); \
+   } while (0)
+
+static INLINE void
+bld_clear_def_use(struct bld_register *regs, int n, int lvl)
+{
+   int i;
+   const uint16_t mask = ~(1 << lvl);
+
+   for (i = 0; i < n * 4; ++i) {
+      regs[i].loop_def &= mask;
+      regs[i].loop_use &= mask;
+   }
+}
+
+static INLINE void
+bld_warn_uninitialized(struct bld_context *bld, int kind,
+                       struct bld_register *reg, struct nv_basic_block *b)
+{
+#ifdef NOUVEAU_DEBUG_BITS
+   long i = (reg - &bld->tvs[0][0]) / 4;
+   long c = (reg - &bld->tvs[0][0]) & 3;
+
+   if (c == 3)
+      c = -1;
+   debug_printf("WARNING: TEMP[%li].%c %s used uninitialized in BB:%i\n",
+                i, (int)('x' + c), kind ? "may be" : "is", b->id);
+#endif
+}
+
+static INLINE struct nv_value *
+bld_def(struct nv_instruction *i, int c, struct nv_value *value)
+{
+   i->def[c] = value;
+   value->insn = i;
+   return value;
+}
+
+static INLINE struct nv_value *
+find_by_bb(struct bld_register *reg, struct nv_basic_block *b)
+{
+   int i;
+
+   if (reg->current && reg->current->insn->bb == b)
+      return reg->current;
+
+   for (i = 0; i < reg->vals.size / sizeof(struct nv_value *); ++i)
+      if ((*bld_register_access(reg, i))->insn->bb == b)
+         return *bld_register_access(reg, i);
+   return NULL;
+}
+
+/* Fetch value from register that was defined in the specified BB,
+ * or search for first definitions in all of its predecessors.
+ */
+static void
+fetch_by_bb(struct bld_register *reg,
+            struct nv_value **vals, int *n,
+            struct nv_basic_block *b)
+{
+   int i;
+   struct nv_value *val;
+
+   assert(*n < 16); /* MAX_COND_NESTING */
+
+   val = find_by_bb(reg, b);
+   if (val) {
+      for (i = 0; i < *n; ++i)
+         if (vals[i] == val)
+            return;
+      vals[(*n)++] = val;
+      return;
+   }
+   for (i = 0; i < b->num_in; ++i)
+      if (!IS_WALL_EDGE(b->in_kind[i]))
+         fetch_by_bb(reg, vals, n, b->in[i]);
+}
+
+static INLINE struct nv_value *
+bld_load_imm_u32(struct bld_context *bld, uint32_t u);
+
+static INLINE struct nv_value *
+bld_undef(struct bld_context *bld, ubyte file)
+{
+   struct nv_instruction *nvi = new_instruction(bld->pc, NV_OP_UNDEF);
+
+   return bld_def(nvi, 0, new_value(bld->pc, file, 4));
+}
+
+static struct nv_value *
+bld_phi(struct bld_context *bld, struct nv_basic_block *b,
+        struct bld_register *reg)
+{
+   struct nv_basic_block *in;
+   struct nv_value *vals[16] = { NULL };
+   struct nv_value *val;
+   struct nv_instruction *phi;
+   int i, j, n;
+
+   do {
+      i = n = 0;
+      fetch_by_bb(reg, vals, &n, b);
+
+      if (!n) {
+         bld_warn_uninitialized(bld, 0, reg, b);
+         return NULL;
+      }
+
+      if (n == 1) {
+         if (nvc0_bblock_dominated_by(b, vals[0]->insn->bb))
+            break;
+
+         bld_warn_uninitialized(bld, 1, reg, b);
+
+         /* back-tracking to insert missing value of other path */
+         in = b;
+         while (in->in[0]) {
+            if (in->num_in == 1) {
+               in = in->in[0];
+            } else {
+               if (!nvc0_bblock_reachable_by(in->in[0], vals[0]->insn->bb, b))
+                  in = in->in[0];
+               else
+               if (!nvc0_bblock_reachable_by(in->in[1], vals[0]->insn->bb, b))
+                  in = in->in[1];
+               else
+                  in = in->in[0];
+            }
+         }
+         bld->pc->current_block = in;
+
+         /* should make this a no-op */
+         bld_register_add_val(reg, bld_undef(bld, vals[0]->reg.file));
+         continue;
+      }
+
+      for (i = 0; i < n; ++i) {
+         /* if value dominates b, continue to the redefinitions */
+         if (nvc0_bblock_dominated_by(b, vals[i]->insn->bb))
+            continue;
+
+         /* if value dominates any in-block, b should be the dom frontier */
+         for (j = 0; j < b->num_in; ++j)
+            if (nvc0_bblock_dominated_by(b->in[j], vals[i]->insn->bb))
+               break;
+         /* otherwise, find the dominance frontier and put the phi there */
+         if (j == b->num_in) {
+            in = nvc0_bblock_dom_frontier(vals[i]->insn->bb);
+            val = bld_phi(bld, in, reg);
+            bld_register_add_val(reg, val);
+            break;
+         }
+      }
+   } while(i < n);
+
+   bld->pc->current_block = b;
+
+   if (n == 1)
+      return vals[0];
+
+   phi = new_instruction(bld->pc, NV_OP_PHI);
+
+   bld_def(phi, 0, new_value(bld->pc, vals[0]->reg.file, vals[0]->reg.size));
+   for (i = 0; i < n; ++i)
+      nv_reference(bld->pc, phi, i, vals[i]);
+
+   return phi->def[0];
+}
+
+/* Insert a phi function in the loop header.
+ * For nested loops, we need to insert phi functions in all the outer
+ * loop headers if they don't have one yet.
+ *
+ * @def: redefinition from inside loop, or NULL if to be replaced later
+ */
+static struct nv_value *
+bld_loop_phi(struct bld_context *bld, struct bld_register *reg,
+             struct nv_value *def)
+{
+   struct nv_instruction *phi;
+   struct nv_basic_block *bb = bld->pc->current_block;
+   struct nv_value *val = NULL;
+
+   if (bld->loop_lvl > 1) {
+      --bld->loop_lvl;
+      if (!((reg->loop_def | reg->loop_use) & (1 << bld->loop_lvl)))
+         val = bld_loop_phi(bld, reg, NULL);
+      ++bld->loop_lvl;
+   }
+
+   if (!val)
+      val = bld_phi(bld, bld->pc->current_block, reg); /* old definition */
+   if (!val) {
+      bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1]->in[0];
+      val = bld_undef(bld, bld_register_file(bld, reg));
+   }
+
+   bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1];
+
+   phi = new_instruction(bld->pc, NV_OP_PHI);
+
+   bld_def(phi, 0, new_value_like(bld->pc, val));
+   if (!def)
+      def = phi->def[0];
+
+   bld_register_add_val(reg, phi->def[0]);
+
+   phi->target = (struct nv_basic_block *)reg; /* cheat */
+
+   nv_reference(bld->pc, phi, 0, val);
+   nv_reference(bld->pc, phi, 1, def);
+
+   bld->pc->current_block = bb;
+
+   return phi->def[0];
+}
+
+static INLINE struct nv_value *
+bld_fetch_global(struct bld_context *bld, struct bld_register *reg)
+{
+   const uint16_t m = 1 << bld->loop_lvl;
+   const uint16_t use = reg->loop_use;
+
+   reg->loop_use |= m;
+
+   /* If neither used nor def'd inside the loop, build a phi in foresight,
+    * so we don't have to replace stuff later on, which requires tracking.
+    */
+   if (bld->loop_lvl && !((use | reg->loop_def) & m))
+      return bld_loop_phi(bld, reg, NULL);
+
+   return bld_phi(bld, bld->pc->current_block, reg);
+}
+
+static INLINE struct nv_value *
+bld_imm_u32(struct bld_context *bld, uint32_t u)
+{
+   int i;
+   unsigned n = bld->num_immds;
+
+   for (i = 0; i < n; ++i)
+      if (bld->saved_immd[i]->reg.imm.u32 == u)
+         return bld->saved_immd[i];
+
+   assert(n < BLD_MAX_IMMDS);
+   bld->num_immds++;
+
+   bld->saved_immd[n] = new_value(bld->pc, NV_FILE_IMM, 4);
+   bld->saved_immd[n]->reg.imm.u32 = u;
+   return bld->saved_immd[n];
+}
+
+static void
+bld_replace_value(struct nv_pc *, struct nv_basic_block *, struct nv_value *,
+                  struct nv_value *);
+
+/* Replace the source of the phi in the loop header by the last assignment,
+ * or eliminate the phi function if there is no assignment inside the loop.
+ *
+ * Redundancy situation 1 - (used) but (not redefined) value:
+ *  %3 = phi %0, %3 = %3 is used
+ *  %3 = phi %0, %4 = is new definition
+ *
+ * Redundancy situation 2 - (not used) but (redefined) value:
+ *  %3 = phi %0, %2 = %2 is used, %3 could be used outside, deleted by DCE
+ */
+static void
+bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb)
+{
+   struct nv_basic_block *save = bld->pc->current_block;
+   struct nv_instruction *phi, *next;
+   struct nv_value *val;
+   struct bld_register *reg;
+   int i, s, n;
+
+   for (phi = bb->phi; phi && phi->opcode == NV_OP_PHI; phi = next) {
+      next = phi->next;
+
+      reg = (struct bld_register *)phi->target;
+      phi->target = NULL;
+
+      for (s = 1, n = 0; n < bb->num_in; ++n) {
+         if (bb->in_kind[n] != CFG_EDGE_BACK)
+            continue;
+
+         assert(s < 4);
+         bld->pc->current_block = bb->in[n];
+         val = bld_fetch_global(bld, reg);
+
+         for (i = 0; i < 4; ++i)
+            if (phi->src[i] && phi->src[i]->value == val)
+               break;
+         if (i == 4)
+            nv_reference(bld->pc, phi, s++, val);
+      }
+      bld->pc->current_block = save;
+
+      if (phi->src[0]->value == phi->def[0] ||
+          phi->src[0]->value == phi->src[1]->value)
+         s = 1;
+      else
+      if (phi->src[1]->value == phi->def[0])
+         s = 0;
+      else
+         continue;
+
+      if (s >= 0) {
+         /* eliminate the phi */
+         bld_register_del_val(reg, phi->def[0]);
+
+         ++bld->pc->pass_seq;
+         bld_replace_value(bld->pc, bb, phi->def[0], phi->src[s]->value);
+
+         nvc0_insn_delete(phi);
+      }
+   }
+}
+
+static INLINE struct nv_value *
+bld_imm_f32(struct bld_context *bld, float f)
+{
+   return bld_imm_u32(bld, fui(f));
+}
+
+static struct nv_value *
+bld_insn_1(struct bld_context *bld, uint opcode, struct nv_value *src0)
+{
+   struct nv_instruction *insn = new_instruction(bld->pc, opcode);
+
+   nv_reference(bld->pc, insn, 0, src0);
+   
+   return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.size));
+}
+
+static struct nv_value *
+bld_insn_2(struct bld_context *bld, uint opcode,
+           struct nv_value *src0, struct nv_value *src1)
+{
+   struct nv_instruction *insn = new_instruction(bld->pc, opcode);
+
+   nv_reference(bld->pc, insn, 0, src0);
+   nv_reference(bld->pc, insn, 1, src1);
+
+   return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.size));
+}
+
+static struct nv_value *
+bld_insn_3(struct bld_context *bld, uint opcode,
+           struct nv_value *src0, struct nv_value *src1,
+           struct nv_value *src2)
+{
+   struct nv_instruction *insn = new_instruction(bld->pc, opcode);
+
+   nv_reference(bld->pc, insn, 0, src0);
+   nv_reference(bld->pc, insn, 1, src1);
+   nv_reference(bld->pc, insn, 2, src2);
+
+   return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.size));
+}
+
+static INLINE void
+bld_src_predicate(struct bld_context *bld,
+                  struct nv_instruction *nvi, int s, struct nv_value *val)
+{
+   nvi->predicate = s;
+   nv_reference(bld->pc, nvi, s, val);
+}
+
+static INLINE void
+bld_src_pointer(struct bld_context *bld,
+                struct nv_instruction *nvi, int s, struct nv_value *val)
+{
+   nvi->indirect = s;
+   nv_reference(bld->pc, nvi, s, val);
+}
+
+static void
+bld_lmem_store(struct bld_context *bld, struct nv_value *ptr, int ofst,
+               struct nv_value *val)
+{
+   struct nv_instruction *insn = new_instruction(bld->pc, NV_OP_ST);
+   struct nv_value *loc;
+
+   loc = new_value(bld->pc, NV_FILE_MEM_L, nv_type_sizeof(NV_TYPE_U32));
+
+   loc->reg.id = ofst * 4;
+
+   nv_reference(bld->pc, insn, 0, loc);
+   nv_reference(bld->pc, insn, 1, ptr);
+   nv_reference(bld->pc, insn, 2, val);
+}
+
+static struct nv_value *
+bld_lmem_load(struct bld_context *bld, struct nv_value *ptr, int ofst)
+{
+   struct nv_value *loc, *val;
+
+   loc = new_value(bld->pc, NV_FILE_MEM_L, nv_type_sizeof(NV_TYPE_U32));
+
+   loc->reg.address = ofst * 4;
+
+   val = bld_insn_2(bld, NV_OP_LD, loc, ptr);
+
+   return val;
+}
+
+static struct nv_value *
+bld_pow(struct bld_context *bld, struct nv_value *x, struct nv_value *e)
+{
+   struct nv_value *val;
+
+   val = bld_insn_1(bld, NV_OP_LG2, x);
+   val = bld_insn_2(bld, NV_OP_MUL_F32, e, val);
+
+   val = bld_insn_1(bld, NV_OP_PREEX2, val);
+   val = bld_insn_1(bld, NV_OP_EX2, val);
+
+   return val;
+}
+
+static INLINE struct nv_value *
+bld_load_imm_f32(struct bld_context *bld, float f)
+{
+   if (f == 0.0f)
+      return bld->zero;
+   return bld_insn_1(bld, NV_OP_MOV, bld_imm_f32(bld, f));
+}
+
+static INLINE struct nv_value *
+bld_load_imm_u32(struct bld_context *bld, uint32_t u)
+{
+   if (u == 0)
+      return bld->zero;
+   return bld_insn_1(bld, NV_OP_MOV, bld_imm_u32(bld, u));
+}
+
+static INLINE struct nv_value *
+bld_setp(struct bld_context *bld, uint op, uint8_t cc,
+         struct nv_value *src0, struct nv_value *src1)
+{
+   struct nv_value *val = bld_insn_2(bld, op, src0, src1);
+
+   val->reg.file = NV_FILE_PRED;
+   val->reg.size = 1;
+   val->insn->set_cond = cc & 0xf;
+   return val;
+}
+
+static INLINE struct nv_value *
+bld_cvt(struct bld_context *bld, uint8_t dt, uint8_t st, struct nv_value *src)
+{
+   struct nv_value *val = bld_insn_1(bld, NV_OP_CVT, src);
+   val->insn->ext.cvt.d = dt;
+   val->insn->ext.cvt.s = st;
+   return val;
+}
+
+static void
+bld_kil(struct bld_context *bld, struct nv_value *src)
+{
+   struct nv_instruction *nvi;
+
+   src = bld_setp(bld, NV_OP_SET_F32, NV_CC_LT, src, bld->zero);
+
+   nvi = new_instruction(bld->pc, NV_OP_KIL);
+   nvi->fixed = 1;
+
+   bld_src_predicate(bld, nvi, 0, src);
+}
+
+static void
+bld_flow(struct bld_context *bld, uint opcode,
+         struct nv_value *src, struct nv_basic_block *target,
+         boolean reconverge)
+{
+   struct nv_instruction *nvi;
+
+   if (reconverge)
+      new_instruction(bld->pc, NV_OP_JOINAT)->fixed = 1;
+
+   nvi = new_instruction(bld->pc, opcode);
+   nvi->target = target;
+   nvi->terminator = 1;
+   if (src)
+      bld_src_predicate(bld, nvi, 0, src);
+}
+
+static ubyte
+translate_setcc(unsigned opcode)
+{
+   switch (opcode) {
+   case TGSI_OPCODE_SLT: return NV_CC_LT;
+   case TGSI_OPCODE_SGE: return NV_CC_GE;
+   case TGSI_OPCODE_SEQ: return NV_CC_EQ;
+   case TGSI_OPCODE_SGT: return NV_CC_GT;
+   case TGSI_OPCODE_SLE: return NV_CC_LE;
+   case TGSI_OPCODE_SNE: return NV_CC_NE | NV_CC_U;
+   case TGSI_OPCODE_STR: return NV_CC_TR;
+   case TGSI_OPCODE_SFL: return NV_CC_FL;
+
+   case TGSI_OPCODE_ISLT: return NV_CC_LT;
+   case TGSI_OPCODE_ISGE: return NV_CC_GE;
+   case TGSI_OPCODE_USEQ: return NV_CC_EQ;
+   case TGSI_OPCODE_USGE: return NV_CC_GE;
+   case TGSI_OPCODE_USLT: return NV_CC_LT;
+   case TGSI_OPCODE_USNE: return NV_CC_NE;
+   default:
+      assert(0);
+      return NV_CC_FL;
+   }
+}
+
+static uint
+translate_opcode(uint opcode)
+{
+   switch (opcode) {
+   case TGSI_OPCODE_ABS: return NV_OP_ABS_F32;
+   case TGSI_OPCODE_ADD: return NV_OP_ADD_F32;
+   case TGSI_OPCODE_SUB: return NV_OP_SUB_F32;
+   case TGSI_OPCODE_UADD: return NV_OP_ADD_B32;
+   case TGSI_OPCODE_AND: return NV_OP_AND;
+   case TGSI_OPCODE_EX2: return NV_OP_EX2;
+   case TGSI_OPCODE_CEIL: return NV_OP_CEIL;
+   case TGSI_OPCODE_FLR: return NV_OP_FLOOR;
+   case TGSI_OPCODE_TRUNC: return NV_OP_TRUNC;
+   case TGSI_OPCODE_COS: return NV_OP_COS;
+   case TGSI_OPCODE_SIN: return NV_OP_SIN;
+   case TGSI_OPCODE_DDX: return NV_OP_DFDX;
+   case TGSI_OPCODE_DDY: return NV_OP_DFDY;
+   case TGSI_OPCODE_F2I:
+   case TGSI_OPCODE_F2U:
+   case TGSI_OPCODE_I2F:
+   case TGSI_OPCODE_U2F: return NV_OP_CVT;
+   case TGSI_OPCODE_INEG: return NV_OP_NEG_S32;
+   case TGSI_OPCODE_LG2: return NV_OP_LG2;
+   case TGSI_OPCODE_ISHR: return NV_OP_SAR;
+   case TGSI_OPCODE_USHR: return NV_OP_SHR;
+   case TGSI_OPCODE_MAD: return NV_OP_MAD_F32;
+   case TGSI_OPCODE_MAX: return NV_OP_MAX_F32;
+   case TGSI_OPCODE_IMAX: return NV_OP_MAX_S32;
+   case TGSI_OPCODE_UMAX: return NV_OP_MAX_U32;
+   case TGSI_OPCODE_MIN: return NV_OP_MIN_F32;
+   case TGSI_OPCODE_IMIN: return NV_OP_MIN_S32;
+   case TGSI_OPCODE_UMIN: return NV_OP_MIN_U32;
+   case TGSI_OPCODE_MUL: return NV_OP_MUL_F32;
+   case TGSI_OPCODE_UMUL: return NV_OP_MUL_B32;
+   case TGSI_OPCODE_OR: return NV_OP_OR;
+   case TGSI_OPCODE_RCP: return NV_OP_RCP;
+   case TGSI_OPCODE_RSQ: return NV_OP_RSQ;
+   case TGSI_OPCODE_SAD: return NV_OP_SAD;
+   case TGSI_OPCODE_SHL: return NV_OP_SHL;
+   case TGSI_OPCODE_SLT:
+   case TGSI_OPCODE_SGE:
+   case TGSI_OPCODE_SEQ:
+   case TGSI_OPCODE_SGT:
+   case TGSI_OPCODE_SLE:
+   case TGSI_OPCODE_SNE: return NV_OP_FSET_F32;
+   case TGSI_OPCODE_ISLT:
+   case TGSI_OPCODE_ISGE: return NV_OP_SET_S32;
+   case TGSI_OPCODE_USEQ:
+   case TGSI_OPCODE_USGE:
+   case TGSI_OPCODE_USLT:
+   case TGSI_OPCODE_USNE: return NV_OP_SET_U32;
+   case TGSI_OPCODE_TEX: return NV_OP_TEX;
+   case TGSI_OPCODE_TXP: return NV_OP_TEX;
+   case TGSI_OPCODE_TXB: return NV_OP_TXB;
+   case TGSI_OPCODE_TXL: return NV_OP_TXL;
+   case TGSI_OPCODE_XOR: return NV_OP_XOR;
+   default:
+      return NV_OP_NOP;
+   }
+}
+
+#if 0
+static ubyte
+infer_src_type(unsigned opcode)
+{
+   switch (opcode) {
+   case TGSI_OPCODE_MOV:
+   case TGSI_OPCODE_AND:
+   case TGSI_OPCODE_OR:
+   case TGSI_OPCODE_XOR:
+   case TGSI_OPCODE_SAD:
+   case TGSI_OPCODE_U2F:
+   case TGSI_OPCODE_UADD:
+   case TGSI_OPCODE_UDIV:
+   case TGSI_OPCODE_UMOD:
+   case TGSI_OPCODE_UMAD:
+   case TGSI_OPCODE_UMUL:
+   case TGSI_OPCODE_UMAX:
+   case TGSI_OPCODE_UMIN:
+   case TGSI_OPCODE_USEQ:
+   case TGSI_OPCODE_USGE:
+   case TGSI_OPCODE_USLT:
+   case TGSI_OPCODE_USNE:
+   case TGSI_OPCODE_USHR:
+      return NV_TYPE_U32;
+   case TGSI_OPCODE_I2F:
+   case TGSI_OPCODE_IDIV:
+   case TGSI_OPCODE_IMAX:
+   case TGSI_OPCODE_IMIN:
+   case TGSI_OPCODE_INEG:
+   case TGSI_OPCODE_ISGE:
+   case TGSI_OPCODE_ISHR:
+   case TGSI_OPCODE_ISLT:
+      return NV_TYPE_S32;
+   default:
+      return NV_TYPE_F32;
+   }
+}
+
+static ubyte
+infer_dst_type(unsigned opcode)
+{
+   switch (opcode) {
+   case TGSI_OPCODE_MOV:
+   case TGSI_OPCODE_F2U:
+   case TGSI_OPCODE_AND:
+   case TGSI_OPCODE_OR:
+   case TGSI_OPCODE_XOR:
+   case TGSI_OPCODE_SAD:
+   case TGSI_OPCODE_UADD:
+   case TGSI_OPCODE_UDIV:
+   case TGSI_OPCODE_UMOD:
+   case TGSI_OPCODE_UMAD:
+   case TGSI_OPCODE_UMUL:
+   case TGSI_OPCODE_UMAX:
+   case TGSI_OPCODE_UMIN:
+   case TGSI_OPCODE_USEQ:
+   case TGSI_OPCODE_USGE:
+   case TGSI_OPCODE_USLT:
+   case TGSI_OPCODE_USNE:
+   case TGSI_OPCODE_USHR:
+      return NV_TYPE_U32;
+   case TGSI_OPCODE_F2I:
+   case TGSI_OPCODE_IDIV:
+   case TGSI_OPCODE_IMAX:
+   case TGSI_OPCODE_IMIN:
+   case TGSI_OPCODE_INEG:
+   case TGSI_OPCODE_ISGE:
+   case TGSI_OPCODE_ISHR:
+   case TGSI_OPCODE_ISLT:
+      return NV_TYPE_S32;
+   default:
+      return NV_TYPE_F32;
+   }
+}
+#endif
+
+static void
+emit_store(struct bld_context *bld, const struct tgsi_full_instruction *inst,
+           unsigned chan, struct nv_value *res)
+{
+   const struct tgsi_full_dst_register *reg = &inst->Dst[0];
+   struct nv_instruction *nvi;
+   struct nv_value *mem;
+   struct nv_value *ptr = NULL;
+   int idx;
+
+   idx = reg->Register.Index;
+   assert(chan < 4);
+
+   if (reg->Register.Indirect)
+      ptr = FETCH_ADDR(reg->Indirect.Index,
+                       tgsi_util_get_src_register_swizzle(&reg->Indirect, 0));
+
+   switch (inst->Instruction.Saturate) {
+   case TGSI_SAT_NONE:
+      break;
+   case TGSI_SAT_ZERO_ONE:
+      res = bld_insn_1(bld, NV_OP_SAT, res);
+      break;
+   case TGSI_SAT_MINUS_PLUS_ONE:
+      res = bld_insn_2(bld, NV_OP_MAX_F32, res, bld_load_imm_f32(bld, -1.0f));
+      res = bld_insn_2(bld, NV_OP_MIN_F32, res, bld_load_imm_f32(bld, 1.0f));
+      break;
+   }
+
+   switch (reg->Register.File) {
+   case TGSI_FILE_OUTPUT:
+      if (!res->insn)
+         res = bld_insn_1(bld, NV_OP_MOV, res);
+
+      if (bld->pc->is_fragprog) {
+         assert(!ptr);
+         STORE_OUTP(idx, chan, res);
+      } else {
+         nvi = new_instruction(bld->pc, NV_OP_EXPORT);
+         mem = new_value(bld->pc, bld->ti->output_file, res->reg.size);
+         nv_reference(bld->pc, nvi, 0, mem);
+         nv_reference(bld->pc, nvi, 1, res);
+         if (!ptr)
+            mem->reg.address = bld->ti->output_loc[idx][chan];
+         else
+            mem->reg.address = 0x80 + idx * 16 + chan * 4;
+         nvi->fixed = 1;
+      }
+      break;
+   case TGSI_FILE_TEMPORARY:
+      assert(idx < BLD_MAX_TEMPS);
+      if (!res->insn)
+         res = bld_insn_1(bld, NV_OP_MOV, res);
+
+      assert(res->reg.file == NV_FILE_GPR);
+      assert(res->insn->bb = bld->pc->current_block);
+
+      if (bld->ti->require_stores)
+         bld_lmem_store(bld, ptr, idx * 4 + chan, res);
+      else
+         STORE_TEMP(idx, chan, res);
+      break;
+   case TGSI_FILE_ADDRESS:
+      assert(idx < BLD_MAX_ADDRS);
+      STORE_ADDR(idx, chan, res);
+      break;
+   }
+}
+
+static INLINE uint32_t
+bld_is_output_written(struct bld_context *bld, int i, int c)
+{
+   if (c < 0)
+      return bld->outputs_written[i / 8] & (0xf << ((i * 4) % 32));
+   return bld->outputs_written[i / 8] & (1 << ((i * 4 + c) % 32));
+}
+
+static void
+bld_export_fp_outputs(struct bld_context *bld)
+{
+   struct nv_value *vals[4];
+   struct nv_instruction *nvi;
+   int i, c, n;
+
+   for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) {
+      if (!bld_is_output_written(bld, i, -1))
+         continue;
+      for (n = 0, c = 0; c < 4; ++c) {
+         if (!bld_is_output_written(bld, i, c))
+            continue;
+         vals[n] = bld_fetch_global(bld, &bld->ovs[i][c]);
+         assert(vals[n]);
+         vals[n] = bld_insn_1(bld, NV_OP_MOV, vals[n]);
+         vals[n++]->reg.id = bld->ti->output_loc[i][c];
+      }
+      assert(n);
+
+      (nvi = new_instruction(bld->pc, NV_OP_EXPORT))->fixed = 1;
+      for (c = 0; c < n; ++c)
+         nv_reference(bld->pc, nvi, c, vals[c]);
+   }
+}
+
+static void
+bld_new_block(struct bld_context *bld, struct nv_basic_block *b)
+{
+   int i, c;
+
+   bld->pc->current_block = b;
+
+   for (i = 0; i < 4; ++i)
+      bld->saved_addr[i][0] = NULL;
+   for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i)
+      for (c = 0; c < 4; ++c)
+         bld->saved_inputs[i][c] = NULL;
+
+   bld->out_kind = CFG_EDGE_FORWARD;
+}
+
+static struct nv_value *
+bld_get_saved_input(struct bld_context *bld, unsigned i, unsigned c)
+{
+   if (bld->saved_inputs[i][c])
+      return bld->saved_inputs[i][c];
+   return NULL;
+}
+
+static struct nv_value *
+bld_interp(struct bld_context *bld, unsigned mode, struct nv_value *val)
+{
+   unsigned cent = mode & NVC0_INTERP_CENTROID;
+
+   mode &= ~NVC0_INTERP_CENTROID;
+   
+   if (val->reg.address == 0x3fc) {
+      /* gl_FrontFacing: 0/~0 to -1.0/+1.0 */
+      val = bld_insn_1(bld, NV_OP_LINTERP, val);
+      val = bld_insn_2(bld, NV_OP_SHL, val, bld_imm_u32(bld, 31));
+      val = bld_insn_2(bld, NV_OP_XOR, val, bld_imm_f32(bld, -1.0f));
+   } else
+   if (mode == NVC0_INTERP_PERSPECTIVE) {
+      val = bld_insn_2(bld, NV_OP_PINTERP, val, bld->frag_coord[3]);
+   } else {
+      val = bld_insn_1(bld, NV_OP_LINTERP, val);
+   }
+
+   val->insn->flat = mode == NVC0_INTERP_FLAT ? 1 : 0;
+   val->insn->centroid = cent ? 1 : 0;
+   return val;
+}
+
+static struct nv_value *
+emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn,
+           const unsigned s, const unsigned chan)
+{
+   const struct tgsi_full_src_register *src = &insn->Src[s];
+   struct nv_value *res = NULL;
+   struct nv_value *ptr = NULL;
+   int idx, ind_idx, dim_idx;
+   unsigned swz, ind_swz, sgn;
+
+   idx = src->Register.Index;
+   swz = tgsi_util_get_full_src_register_swizzle(src, chan);
+
+   if (src->Register.Indirect) {
+      ind_idx = src->Indirect.Index;
+      ind_swz = tgsi_util_get_src_register_swizzle(&src->Indirect, 0);
+
+      ptr = FETCH_ADDR(ind_idx, ind_swz);
+   }
+
+   if (src->Register.Dimension)
+      dim_idx = src->Dimension.Index;
+   else
+      dim_idx = 0;
+
+   switch (src->Register.File) {
+   case TGSI_FILE_CONSTANT:
+      assert(dim_idx < 14);
+      res = new_value(bld->pc, NV_FILE_MEM_C(dim_idx), 4);
+      res->reg.address = idx * 16 + swz * 4;
+      res = bld_insn_1(bld, NV_OP_LD, res);
+      if (ptr)
+         bld_src_pointer(bld, res->insn, 1, ptr);
+      break;
+   case TGSI_FILE_IMMEDIATE: /* XXX: type for MOV TEMP[0], -IMM[0] */
+      assert(idx < bld->ti->immd32_nr);
+      res = bld_load_imm_u32(bld, bld->ti->immd32[idx * 4 + swz]);
+      break;
+   case TGSI_FILE_INPUT:
+      assert(!src->Register.Dimension);
+      if (!ptr) {
+         res = bld_get_saved_input(bld, idx, swz);
+         if (res)
+            return res;
+      }
+      res = new_value(bld->pc, bld->ti->input_file, 4);
+      if (ptr)
+         res->reg.address = 0x80 + idx * 16 + swz * 4;
+      else
+         res->reg.address = bld->ti->input_loc[idx][swz];
+
+      if (bld->pc->is_fragprog)
+         res = bld_interp(bld, bld->ti->interp_mode[idx], res);
+      else
+         res = bld_insn_1(bld, NV_OP_VFETCH, res);
+
+      if (ptr)
+         bld_src_pointer(bld, res->insn, res->insn->src[1] ? 2 : 1, ptr);
+      else
+         bld->saved_inputs[idx][swz] = res;
+      break;
+   case TGSI_FILE_TEMPORARY:
+      if (bld->ti->require_stores)
+         res = bld_lmem_load(bld, ptr, idx * 4 + swz);
+      else
+         res = bld_fetch_global(bld, &bld->tvs[idx][swz]);
+      break;
+   case TGSI_FILE_ADDRESS:
+      res = bld_fetch_global(bld, &bld->avs[idx][swz]);
+      break;
+   case TGSI_FILE_PREDICATE:
+      res = bld_fetch_global(bld, &bld->pvs[idx][swz]);
+      break;
+   default:
+      NOUVEAU_ERR("illegal/unhandled src reg file: %d\n", src->Register.File);
+      abort();
+      break;      
+   }
+   if (!res)
+      return bld_undef(bld, NV_FILE_GPR);
+
+   sgn = tgsi_util_get_full_src_register_sign_mode(src, chan);
+
+   switch (sgn) {
+   case TGSI_UTIL_SIGN_KEEP:
+      break;
+   case TGSI_UTIL_SIGN_CLEAR:
+      res = bld_insn_1(bld, NV_OP_ABS_F32, res);
+      break;
+   case TGSI_UTIL_SIGN_TOGGLE:
+      res = bld_insn_1(bld, NV_OP_NEG_F32, res);
+      break;
+   case TGSI_UTIL_SIGN_SET:
+      res = bld_insn_1(bld, NV_OP_ABS_F32, res);
+      res = bld_insn_1(bld, NV_OP_NEG_F32, res);
+      break;
+   default:
+      NOUVEAU_ERR("illegal/unhandled src reg sign mode\n");
+      abort();
+      break;
+   }
+
+   return res;
+}
+
+static void
+bld_lit(struct bld_context *bld, struct nv_value *dst0[4],
+        const struct tgsi_full_instruction *insn)
+{
+   struct nv_value *val0 = NULL;
+   unsigned mask = insn->Dst[0].Register.WriteMask;
+
+   if (mask & ((1 << 0) | (1 << 3)))
+      dst0[3] = dst0[0] = bld_load_imm_f32(bld, 1.0f);
+
+   if (mask & (3 << 1)) {
+      val0 = bld_insn_2(bld, NV_OP_MAX, emit_fetch(bld, insn, 0, 0), bld->zero);
+      if (mask & (1 << 1))
+         dst0[1] = val0;
+   }
+
+   if (mask & (1 << 2)) {
+      struct nv_value *val1, *val3, *src1, *src3, *pred;
+      struct nv_value *pos128 = bld_load_imm_f32(bld, 127.999999f);
+      struct nv_value *neg128 = bld_load_imm_f32(bld, -127.999999f);
+
+      src1 = emit_fetch(bld, insn, 0, 1);
+      src3 = emit_fetch(bld, insn, 0, 3);
+
+      pred = bld_setp(bld, NV_OP_SET_F32, NV_CC_LE, val0, bld->zero);
+
+      val1 = bld_insn_2(bld, NV_OP_MAX_F32, src1, bld->zero);
+      val3 = bld_insn_2(bld, NV_OP_MAX_F32, src3, neg128);
+      val3 = bld_insn_2(bld, NV_OP_MIN_F32, val3, pos128);
+      val3 = bld_pow(bld, val1, val3);
+
+      dst0[2] = bld_insn_1(bld, NV_OP_MOV, bld->zero);
+      bld_src_predicate(bld, dst0[2]->insn, 1, pred);
+
+      dst0[2] = bld_insn_2(bld, NV_OP_SELECT, val3, dst0[2]);
+   }
+}
+
+static INLINE void
+get_tex_dim(const struct tgsi_full_instruction *insn, int *dim, int *arg)
+{
+   switch (insn->Texture.Texture) {
+   case TGSI_TEXTURE_1D:
+      *arg = *dim = 1;
+      break;
+   case TGSI_TEXTURE_SHADOW1D:
+      *dim = 1;
+      *arg = 2;
+      break;
+   case TGSI_TEXTURE_UNKNOWN:
+   case TGSI_TEXTURE_2D:
+   case TGSI_TEXTURE_RECT:
+      *arg = *dim = 2;
+      break;
+   case TGSI_TEXTURE_SHADOW2D:
+   case TGSI_TEXTURE_SHADOWRECT:
+      *dim = 2;
+      *arg = 3;
+      break;
+   case TGSI_TEXTURE_3D:
+   case TGSI_TEXTURE_CUBE:
+      *dim = *arg = 3;
+      break;
+   default:
+      assert(0);
+      break;
+   }
+}
+
+static struct nv_value *
+bld_clone(struct bld_context *bld, struct nv_instruction *nvi)
+{
+   struct nv_instruction *dupi = new_instruction(bld->pc, nvi->opcode);
+   struct nv_instruction *next, *prev;
+   int c;
+
+   next = dupi->next;
+   prev = dupi->prev;
+
+   *dupi = *nvi;
+
+   dupi->next = next;
+   dupi->prev = prev;
+
+   for (c = 0; c < 5 && nvi->def[c]; ++c)
+      bld_def(dupi, c, new_value_like(bld->pc, nvi->def[c]));
+
+   for (c = 0; c < 6 && nvi->src[c]; ++c) {
+      dupi->src[c] = NULL;
+      nv_reference(bld->pc, dupi, c, nvi->src[c]->value);
+   }
+
+   return dupi->def[0];
+}
+
+/* NOTE: proj(t0) = (t0 / w) / (tc3 / w) = tc0 / tc2 handled by optimizer */
+static void
+load_proj_tex_coords(struct bld_context *bld,
+                     struct nv_value *t[4], int dim, int arg,
+                     const struct tgsi_full_instruction *insn)
+{
+   int c;
+   unsigned mask = (1 << dim) - 1;
+
+   if (arg != dim)
+      mask |= 4; /* depth comparison value */
+
+   t[3] = emit_fetch(bld, insn, 0, 3);
+   if (t[3]->insn->opcode == NV_OP_PINTERP) {
+      t[3] = bld_clone(bld, t[3]->insn);
+      t[3]->insn->opcode = NV_OP_LINTERP;
+      nv_reference(bld->pc, t[3]->insn, 1, NULL);
+   }
+   t[3] = bld_insn_1(bld, NV_OP_RCP, t[3]);
+
+   for (c = 0; c < 4; ++c) {
+      if (!(mask & (1 << c)))
+         continue;
+      t[c] = emit_fetch(bld, insn, 0, c);
+
+      if (t[c]->insn->opcode != NV_OP_PINTERP)
+         continue;
+      mask &= ~(1 << c);
+
+      t[c] = bld_clone(bld, t[c]->insn);
+      nv_reference(bld->pc, t[c]->insn, 1, t[3]);
+   }
+   if (mask == 0)
+      return;
+
+   t[3] = emit_fetch(bld, insn, 0, 3);
+   t[3] = bld_insn_1(bld, NV_OP_RCP, t[3]);
+
+   for (c = 0; c < 4; ++c)
+      if (mask & (1 << c))
+         t[c] = bld_insn_2(bld, NV_OP_MUL_F32, t[c], t[3]);
+}
+
+/* For a quad of threads / top left, top right, bottom left, bottom right
+ * pixels, do a different operation, and take src0 from a specific thread.
+ */
+#define QOP_ADD 0
+#define QOP_SUBR 1
+#define QOP_SUB 2
+#define QOP_MOV1 3
+
+#define QOP(a, b, c, d) \
+   ((QOP_##a << 0) | (QOP_##b << 2) | (QOP_##c << 4) | (QOP_##d << 6))
+
+static INLINE struct nv_value *
+bld_quadop(struct bld_context *bld, ubyte qop, struct nv_value *src0, int lane,
+           struct nv_value *src1, boolean wp)
+{
+   struct nv_value *val = bld_insn_2(bld, NV_OP_QUADOP, src0, src1);
+   val->insn->lanes = lane;
+   val->insn->quadop = qop;
+   if (wp) {
+      assert(!"quadop predicate write");
+   }
+   return val;
+}
+
+static struct nv_instruction *
+emit_tex(struct bld_context *bld, uint opcode,
+         struct nv_value *dst[4], struct nv_value *t_in[4],
+         int argc, int tic, int tsc, int cube)
+{
+   struct nv_value *t[4];
+   struct nv_instruction *nvi;
+   int c;
+
+   /* the inputs to a tex instruction must be separate values */
+   for (c = 0; c < argc; ++c) {
+      t[c] = bld_insn_1(bld, NV_OP_MOV, t_in[c]);
+      t[c]->insn->fixed = 1;
+   }
+
+   nvi = new_instruction(bld->pc, opcode);
+   for (c = 0; c < 4; ++c)
+      dst[c] = bld_def(nvi, c, new_value(bld->pc, NV_FILE_GPR, 4));
+   for (c = 0; c < argc; ++c)
+      nv_reference(bld->pc, nvi, c, t[c]);
+
+   nvi->ext.tex.t = tic;
+   nvi->ext.tex.s = tsc;
+   nvi->tex_mask = 0xf;
+   nvi->tex_cube = cube;
+   nvi->tex_live = 0;
+   nvi->tex_argc = argc;
+
+   return nvi;
+}
+
+/*
+static boolean
+bld_is_constant(struct nv_value *val)
+{
+   if (val->reg.file == NV_FILE_IMM)
+      return TRUE;
+   return val->insn && nvCG_find_constant(val->insn->src[0]);
+}
+*/
+
+static void
+bld_tex(struct bld_context *bld, struct nv_value *dst0[4],
+        const struct tgsi_full_instruction *insn)
+{
+   struct nv_value *t[4], *s[3];
+   uint opcode = translate_opcode(insn->Instruction.Opcode);
+   int arg, dim, c;
+   const int tic = insn->Src[1].Register.Index;
+   const int tsc = tic;
+   const int cube = (insn->Texture.Texture  == TGSI_TEXTURE_CUBE) ? 1 : 0;
+
+   get_tex_dim(insn, &dim, &arg);
+
+   if (!cube && insn->Instruction.Opcode == TGSI_OPCODE_TXP)
+      load_proj_tex_coords(bld, t, dim, arg, insn);
+   else {
+      for (c = 0; c < dim; ++c)
+         t[c] = emit_fetch(bld, insn, 0, c);
+      if (arg != dim)
+         t[dim] = emit_fetch(bld, insn, 0, 2);
+   }
+
+   if (cube) {
+      assert(dim >= 3);
+      for (c = 0; c < 3; ++c)
+         s[c] = bld_insn_1(bld, NV_OP_ABS_F32, t[c]);
+
+      s[0] = bld_insn_2(bld, NV_OP_MAX_F32, s[0], s[1]);
+      s[0] = bld_insn_2(bld, NV_OP_MAX_F32, s[0], s[2]);
+      s[0] = bld_insn_1(bld, NV_OP_RCP, s[0]);
+
+      for (c = 0; c < 3; ++c)
+         t[c] = bld_insn_2(bld, NV_OP_MUL_F32, t[c], s[0]);
+   }
+
+   if (opcode == NV_OP_TXB || opcode == NV_OP_TXL)
+      t[arg++] = emit_fetch(bld, insn, 0, 3);
+   emit_tex(bld, opcode, dst0, t, arg, tic, tsc, cube);
+}
+
+static INLINE struct nv_value *
+bld_dot(struct bld_context *bld, const struct tgsi_full_instruction *insn,
+        int n)
+{
+   struct nv_value *dotp, *src0, *src1;
+   int c;
+
+   src0 = emit_fetch(bld, insn, 0, 0);
+   src1 = emit_fetch(bld, insn, 1, 0);
+   dotp = bld_insn_2(bld, NV_OP_MUL_F32, src0, src1);
+
+   for (c = 1; c < n; ++c) {
+      src0 = emit_fetch(bld, insn, 0, c);
+      src1 = emit_fetch(bld, insn, 1, c);
+      dotp = bld_insn_3(bld, NV_OP_MAD_F32, src0, src1, dotp);
+   }
+   return dotp;
+}
+
+#define FOR_EACH_DST0_ENABLED_CHANNEL(chan, inst) \
+   for (chan = 0; chan < 4; ++chan)               \
+      if ((inst)->Dst[0].Register.WriteMask & (1 << chan))
+
+static void
+bld_instruction(struct bld_context *bld,
+                const struct tgsi_full_instruction *insn)
+{
+   struct nv_value *src0;
+   struct nv_value *src1;
+   struct nv_value *src2;
+   struct nv_value *dst0[4] = { NULL };
+   struct nv_value *temp;
+   int c;
+   uint opcode = translate_opcode(insn->Instruction.Opcode);
+   uint8_t mask = insn->Dst[0].Register.WriteMask;
+
+#ifdef NOUVEAU_DEBUG_BITS
+   debug_printf("bld_instruction:"); tgsi_dump_instruction(insn, 1);
+#endif
+       
+   switch (insn->Instruction.Opcode) {
+   case TGSI_OPCODE_ADD:
+   case TGSI_OPCODE_MAX:
+   case TGSI_OPCODE_MIN:
+   case TGSI_OPCODE_MUL:
+      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+         src0 = emit_fetch(bld, insn, 0, c);
+         src1 = emit_fetch(bld, insn, 1, c);
+         dst0[c] = bld_insn_2(bld, opcode, src0, src1);
+      }
+      break;
+   case TGSI_OPCODE_ARL:
+      src1 = bld_imm_u32(bld, 4);
+      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+         src0 = emit_fetch(bld, insn, 0, c);
+         src0 = bld_insn_1(bld, NV_OP_FLOOR, src0);
+         src0->insn->ext.cvt.d = NV_TYPE_S32;
+         src0->insn->ext.cvt.s = NV_TYPE_F32;
+         dst0[c] = bld_insn_2(bld, NV_OP_SHL, src0, src1);
+      }
+      break;
+   case TGSI_OPCODE_CMP:
+      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+         src0 = emit_fetch(bld, insn, 0, c);
+         src0 = bld_setp(bld, NV_OP_SET_F32, NV_CC_LT, src0, bld->zero);
+         src1 = emit_fetch(bld, insn, 1, c);
+         src2 = emit_fetch(bld, insn, 2, c);
+         dst0[c] = bld_insn_3(bld, NV_OP_SELP, src1, src2, src0);
+      }
+      break;
+   case TGSI_OPCODE_COS:
+   case TGSI_OPCODE_SIN:
+      src0 = emit_fetch(bld, insn, 0, 0);
+      temp = bld_insn_1(bld, NV_OP_PRESIN, src0);
+      if (insn->Dst[0].Register.WriteMask & 7)
+         temp = bld_insn_1(bld, opcode, temp);
+      for (c = 0; c < 3; ++c)
+         if (insn->Dst[0].Register.WriteMask & (1 << c))
+            dst0[c] = temp;
+      if (!(insn->Dst[0].Register.WriteMask & (1 << 3)))
+         break;
+      src0 = emit_fetch(bld, insn, 0, 3);
+      temp = bld_insn_1(bld, NV_OP_PRESIN, src0);
+      dst0[3] = bld_insn_1(bld, opcode, temp);
+      break;
+   case TGSI_OPCODE_DP2:
+      temp = bld_dot(bld, insn, 2);
+      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+         dst0[c] = temp;
+      break;
+   case TGSI_OPCODE_DP3:
+      temp = bld_dot(bld, insn, 3);
+      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+         dst0[c] = temp;
+      break;
+   case TGSI_OPCODE_DP4:
+      temp = bld_dot(bld, insn, 4);
+      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+         dst0[c] = temp;
+      break;
+   case TGSI_OPCODE_DPH:
+      src0 = bld_dot(bld, insn, 3);
+      src1 = emit_fetch(bld, insn, 1, 3);
+      temp = bld_insn_2(bld, NV_OP_ADD_F32, src0, src1);
+      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+         dst0[c] = temp;
+      break;
+   case TGSI_OPCODE_DST:
+      if (insn->Dst[0].Register.WriteMask & 1)
+         dst0[0] = bld_imm_f32(bld, 1.0f);
+      if (insn->Dst[0].Register.WriteMask & 2) {
+         src0 = emit_fetch(bld, insn, 0, 1);
+         src1 = emit_fetch(bld, insn, 1, 1);
+         dst0[1] = bld_insn_2(bld, NV_OP_MUL_F32, src0, src1);
+      }
+      if (insn->Dst[0].Register.WriteMask & 4)
+         dst0[2] = emit_fetch(bld, insn, 0, 2);
+      if (insn->Dst[0].Register.WriteMask & 8)
+         dst0[3] = emit_fetch(bld, insn, 1, 3);
+      break;
+   case TGSI_OPCODE_EXP:
+      src0 = emit_fetch(bld, insn, 0, 0);
+      temp = bld_insn_1(bld, NV_OP_FLOOR, src0);
+
+      if (insn->Dst[0].Register.WriteMask & 2)
+         dst0[1] = bld_insn_2(bld, NV_OP_SUB_F32, src0, temp);
+      if (insn->Dst[0].Register.WriteMask & 1) {
+         temp = bld_insn_1(bld, NV_OP_PREEX2, temp);
+         dst0[0] = bld_insn_1(bld, NV_OP_EX2, temp);
+      }
+      if (insn->Dst[0].Register.WriteMask & 4) {
+         temp = bld_insn_1(bld, NV_OP_PREEX2, src0);
+         dst0[2] = bld_insn_1(bld, NV_OP_EX2, temp);
+      }
+      if (insn->Dst[0].Register.WriteMask & 8)
+         dst0[3] = bld_imm_f32(bld, 1.0f);
+      break;
+   case TGSI_OPCODE_EX2:
+      src0 = emit_fetch(bld, insn, 0, 0);
+      temp = bld_insn_1(bld, NV_OP_PREEX2, src0);
+      temp = bld_insn_1(bld, NV_OP_EX2, temp);
+      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+         dst0[c] = temp;
+      break;
+   case TGSI_OPCODE_FRC:
+      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+         src0 = emit_fetch(bld, insn, 0, c);
+         dst0[c] = bld_insn_1(bld, NV_OP_FLOOR, src0);
+         dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src0, dst0[c]);
+      }
+      break;
+   case TGSI_OPCODE_KIL:
+      for (c = 0; c < 4; ++c)
+         bld_kil(bld, emit_fetch(bld, insn, 0, c));
+      break;
+   case TGSI_OPCODE_KILP:
+      (new_instruction(bld->pc, NV_OP_KIL))->fixed = 1;
+      break;
+   case TGSI_OPCODE_IF:
+   {
+      struct nv_basic_block *b = new_basic_block(bld->pc);
+
+      assert(bld->cond_lvl < BLD_MAX_COND_NESTING);
+
+      nvc0_bblock_attach(bld->pc->current_block, b, CFG_EDGE_FORWARD);
+
+      bld->join_bb[bld->cond_lvl] = bld->pc->current_block;
+      bld->cond_bb[bld->cond_lvl] = bld->pc->current_block;
+
+      src1 = bld_setp(bld, NV_OP_SET_U32, NV_CC_NE,
+                      emit_fetch(bld, insn, 0, 0), bld->zero);
+
+      bld_flow(bld, NV_OP_BRA, src1, NULL, (bld->cond_lvl == 0));
+
+      ++bld->cond_lvl;
+      bld_new_block(bld, b);
+   }
+      break;
+   case TGSI_OPCODE_ELSE:
+   {
+      struct nv_basic_block *b = new_basic_block(bld->pc);
+
+      --bld->cond_lvl;
+      nvc0_bblock_attach(bld->join_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD);
+
+      bld->cond_bb[bld->cond_lvl]->exit->target = b;
+      bld->cond_bb[bld->cond_lvl] = bld->pc->current_block;
+
+      new_instruction(bld->pc, NV_OP_BRA)->terminator = 1;
+
+      ++bld->cond_lvl;
+      bld_new_block(bld, b);
+   }
+      break;
+   case TGSI_OPCODE_ENDIF:
+   {
+      struct nv_basic_block *b = new_basic_block(bld->pc);
+
+      --bld->cond_lvl;
+      nvc0_bblock_attach(bld->pc->current_block, b, bld->out_kind);
+      nvc0_bblock_attach(bld->cond_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD);
+
+      bld->cond_bb[bld->cond_lvl]->exit->target = b;
+
+      bld_new_block(bld, b);
+
+      if (!bld->cond_lvl && bld->join_bb[bld->cond_lvl]) {
+         bld->join_bb[bld->cond_lvl]->exit->prev->target = b;
+         new_instruction(bld->pc, NV_OP_JOIN)->join = 1;
+      }
+   }
+      break;
+   case TGSI_OPCODE_BGNLOOP:
+   {
+      struct nv_basic_block *bl = new_basic_block(bld->pc);
+      struct nv_basic_block *bb = new_basic_block(bld->pc);
+
+      assert(bld->loop_lvl < BLD_MAX_LOOP_NESTING);
+
+      bld->loop_bb[bld->loop_lvl] = bl;
+      bld->brkt_bb[bld->loop_lvl] = bb;
+
+      nvc0_bblock_attach(bld->pc->current_block, bl, CFG_EDGE_LOOP_ENTER);
+
+      bld_new_block(bld, bld->loop_bb[bld->loop_lvl++]);
+
+      if (bld->loop_lvl == bld->pc->loop_nesting_bound)
+         bld->pc->loop_nesting_bound++;
+
+      bld_clear_def_use(&bld->tvs[0][0], BLD_MAX_TEMPS, bld->loop_lvl);
+      bld_clear_def_use(&bld->avs[0][0], BLD_MAX_ADDRS, bld->loop_lvl);
+      bld_clear_def_use(&bld->pvs[0][0], BLD_MAX_PREDS, bld->loop_lvl);
+   }
+      break;
+   case TGSI_OPCODE_BRK:
+   {
+      struct nv_basic_block *bb = bld->brkt_bb[bld->loop_lvl - 1];
+
+      bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE);
+
+      if (bld->out_kind == CFG_EDGE_FORWARD) /* else we already had BRK/CONT */
+         nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_LOOP_LEAVE);
+
+      bld->out_kind = CFG_EDGE_FAKE;
+   }
+      break;
+   case TGSI_OPCODE_CONT:
+   {
+      struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1];
+
+      bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE);
+
+      nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK);
+
+      if ((bb = bld->join_bb[bld->cond_lvl - 1])) {
+         bld->join_bb[bld->cond_lvl - 1] = NULL;
+         nvc0_insn_delete(bb->exit->prev);
+      }
+      bld->out_kind = CFG_EDGE_FAKE;
+   }
+      break;
+   case TGSI_OPCODE_ENDLOOP:
+   {
+      struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1];
+
+      bld_flow(bld, NV_OP_BRA, NULL, bb, FALSE);
+
+      nvc0_bblock_attach(bld->pc->current_block, bb, CFG_EDGE_BACK);
+
+      bld_loop_end(bld, bb); /* replace loop-side operand of the phis */
+
+      bld_new_block(bld, bld->brkt_bb[--bld->loop_lvl]);
+   }
+      break;
+   case TGSI_OPCODE_ABS:
+   case TGSI_OPCODE_CEIL:
+   case TGSI_OPCODE_FLR:
+   case TGSI_OPCODE_TRUNC:
+   case TGSI_OPCODE_DDX:
+   case TGSI_OPCODE_DDY:
+      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+         src0 = emit_fetch(bld, insn, 0, c);
+         dst0[c] = bld_insn_1(bld, opcode, src0);
+      }           
+      break;
+   case TGSI_OPCODE_LIT:
+      bld_lit(bld, dst0, insn);
+      break;
+   case TGSI_OPCODE_LRP:
+      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+         src0 = emit_fetch(bld, insn, 0, c);
+         src1 = emit_fetch(bld, insn, 1, c);
+         src2 = emit_fetch(bld, insn, 2, c);
+         dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src1, src2);
+         dst0[c] = bld_insn_3(bld, NV_OP_MAD_F32, dst0[c], src0, src2);
+      }
+      break;
+   case TGSI_OPCODE_MOV:
+      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+         dst0[c] = emit_fetch(bld, insn, 0, c);
+      break;
+   case TGSI_OPCODE_MAD:
+      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+         src0 = emit_fetch(bld, insn, 0, c);
+         src1 = emit_fetch(bld, insn, 1, c);
+         src2 = emit_fetch(bld, insn, 2, c);
+         dst0[c] = bld_insn_3(bld, opcode, src0, src1, src2);
+      }
+      break;
+   case TGSI_OPCODE_POW:
+      src0 = emit_fetch(bld, insn, 0, 0);
+      src1 = emit_fetch(bld, insn, 1, 0);
+      temp = bld_pow(bld, src0, src1);
+      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+         dst0[c] = temp;
+      break;
+   case TGSI_OPCODE_LOG:
+      src0 = emit_fetch(bld, insn, 0, 0);
+      src0 = bld_insn_1(bld, NV_OP_ABS_F32, src0);
+      temp = bld_insn_1(bld, NV_OP_LG2, src0);
+      dst0[2] = temp;
+      if (insn->Dst[0].Register.WriteMask & 3) {
+         temp = bld_insn_1(bld, NV_OP_FLOOR, temp);
+         dst0[0] = temp;
+      }
+      if (insn->Dst[0].Register.WriteMask & 2) {
+         temp = bld_insn_1(bld, NV_OP_PREEX2, temp);
+         temp = bld_insn_1(bld, NV_OP_EX2, temp);
+         temp = bld_insn_1(bld, NV_OP_RCP, temp);
+         dst0[1] = bld_insn_2(bld, NV_OP_MUL_F32, src0, temp);
+      }
+      if (insn->Dst[0].Register.WriteMask & 8)
+         dst0[3] = bld_imm_f32(bld, 1.0f);
+      break;
+   case TGSI_OPCODE_RCP:
+   case TGSI_OPCODE_LG2:
+      src0 = emit_fetch(bld, insn, 0, 0);
+      temp = bld_insn_1(bld, opcode, src0);
+      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+         dst0[c] = temp;
+      break;
+   case TGSI_OPCODE_RSQ:
+      src0 = emit_fetch(bld, insn, 0, 0);
+      temp = bld_insn_1(bld, NV_OP_ABS_F32, src0);
+      temp = bld_insn_1(bld, NV_OP_RSQ, temp);
+      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+         dst0[c] = temp;
+      break;
+   case TGSI_OPCODE_SLT:
+   case TGSI_OPCODE_SGE:
+   case TGSI_OPCODE_SEQ:
+   case TGSI_OPCODE_SGT:
+   case TGSI_OPCODE_SLE:
+   case TGSI_OPCODE_SNE:
+   case TGSI_OPCODE_ISLT:
+   case TGSI_OPCODE_ISGE:
+   case TGSI_OPCODE_USEQ:
+   case TGSI_OPCODE_USGE:
+   case TGSI_OPCODE_USLT:
+   case TGSI_OPCODE_USNE:
+      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+         src0 = emit_fetch(bld, insn, 0, c);
+         src1 = emit_fetch(bld, insn, 1, c);
+         dst0[c] = bld_insn_2(bld, opcode, src0, src1);
+         dst0[c]->insn->set_cond = translate_setcc(insn->Instruction.Opcode);
+      }
+      break;
+   case TGSI_OPCODE_SCS:
+      if (insn->Dst[0].Register.WriteMask & 0x3) {
+         src0 = emit_fetch(bld, insn, 0, 0);
+         temp = bld_insn_1(bld, NV_OP_PRESIN, src0);
+         if (insn->Dst[0].Register.WriteMask & 0x1)
+            dst0[0] = bld_insn_1(bld, NV_OP_COS, temp);
+         if (insn->Dst[0].Register.WriteMask & 0x2)
+            dst0[1] = bld_insn_1(bld, NV_OP_SIN, temp);
+      }
+      if (insn->Dst[0].Register.WriteMask & 0x4)
+         dst0[2] = bld_imm_f32(bld, 0.0f);
+      if (insn->Dst[0].Register.WriteMask & 0x8)
+         dst0[3] = bld_imm_f32(bld, 1.0f);
+      break;
+   case TGSI_OPCODE_SSG:
+      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) { /* XXX: set lt, set gt, sub */
+         src0 = emit_fetch(bld, insn, 0, c);
+         src1 = bld_setp(bld, NV_OP_SET_F32, NV_CC_EQ, src0, bld->zero);
+         temp = bld_insn_2(bld, NV_OP_AND, src0, bld_imm_u32(bld, 0x80000000));
+         temp = bld_insn_2(bld, NV_OP_OR,  temp, bld_imm_f32(bld, 1.0f));
+         dst0[c] = bld_insn_1(bld, NV_OP_MOV, temp);
+         bld_src_predicate(bld, dst0[c]->insn, 1, src1);
+      }
+      break;
+   case TGSI_OPCODE_SUB:
+      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+         src0 = emit_fetch(bld, insn, 0, c);
+         src1 = emit_fetch(bld, insn, 1, c);
+         dst0[c] = bld_insn_2(bld, NV_OP_SUB_F32, src0, src1);
+      }
+      break;
+   case TGSI_OPCODE_TEX:
+   case TGSI_OPCODE_TXB:
+   case TGSI_OPCODE_TXL:
+   case TGSI_OPCODE_TXP:
+      bld_tex(bld, dst0, insn);
+      break;
+   case TGSI_OPCODE_XPD:
+      FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+         if (c == 3) {
+            dst0[3] = bld_imm_f32(bld, 1.0f);
+            break;
+         }
+         src0 = emit_fetch(bld, insn, 1, (c + 1) % 3);
+         src1 = emit_fetch(bld, insn, 0, (c + 2) % 3);
+         dst0[c] = bld_insn_2(bld, NV_OP_MUL_F32, src0, src1);
+
+         src0 = emit_fetch(bld, insn, 0, (c + 1) % 3);
+         src1 = emit_fetch(bld, insn, 1, (c + 2) % 3);
+         dst0[c] = bld_insn_3(bld, NV_OP_MAD_F32, src0, src1, dst0[c]);
+
+         dst0[c]->insn->src[2]->mod ^= NV_MOD_NEG;
+      }
+      break;
+   case TGSI_OPCODE_RET:
+      (new_instruction(bld->pc, NV_OP_RET))->fixed = 1;
+      break;
+   case TGSI_OPCODE_END:
+      /* VP outputs are exported in-place as scalars, optimization later */
+      if (bld->pc->is_fragprog)
+         bld_export_fp_outputs(bld);
+      break;
+   default:
+      NOUVEAU_ERR("unhandled opcode %u\n", insn->Instruction.Opcode);
+      abort();
+      break;
+   }
+
+   if (insn->Dst[0].Register.File == TGSI_FILE_OUTPUT &&
+       !bld->pc->is_fragprog) {
+      struct nv_instruction *mi = NULL;
+      uint size;
+
+      for (c = 0; c < 4; ++c)
+         if ((mask & (1 << c)) &&
+             ((dst0[c]->reg.file == NV_FILE_IMM) ||
+              (dst0[c]->reg.id == 63 && dst0[c]->reg.file == NV_FILE_GPR)))
+            dst0[c] = bld_insn_1(bld, NV_OP_MOV, dst0[c]);
+
+      c = 0;
+      if ((mask & 0x3) == 0x3) {
+         mask &= ~0x3;
+         size = 8;
+         mi = bld_insn_2(bld, NV_OP_BIND, dst0[0], dst0[1])->insn;
+      }
+      if ((mask & 0xc) == 0xc) {
+         mask &= ~0xc;
+         if (mi) {
+            size = 16;
+            nv_reference(bld->pc, mi, 2, dst0[2]);
+            nv_reference(bld->pc, mi, 3, dst0[3]);
+         } else {
+            c = 2;
+            size = 8;
+            mi = bld_insn_2(bld, NV_OP_BIND, dst0[2], dst0[3])->insn;
+         }
+      } else
+      if (mi && (mask & 0x4)) {
+         size = 12;
+         mask &= ~0x4;
+         nv_reference(bld->pc, mi, 2, dst0[2]);
+      }
+
+      if (mi) {
+         struct nv_instruction *ex = new_instruction(bld->pc, NV_OP_EXPORT);
+         int s;
+
+         nv_reference(bld->pc, ex, 0, new_value(bld->pc, NV_FILE_MEM_V, 4));
+         nv_reference(bld->pc, ex, 1, mi->def[0]);
+
+         for (s = 1; s < size / 4; ++s) {
+            bld_def(mi, s, new_value(bld->pc, NV_FILE_GPR, 4));
+            nv_reference(bld->pc, ex, s + 1, mi->def[s]);
+         }
+
+         ex->fixed = 1;
+         ex->src[0]->value->reg.size = size;
+         ex->src[0]->value->reg.address =
+            bld->ti->output_loc[insn->Dst[0].Register.Index][c];
+      }
+   }
+
+   for (c = 0; c < 4; ++c)
+      if (mask & (1 << c))
+         emit_store(bld, insn, c, dst0[c]);
+}
+
+static INLINE void
+bld_free_registers(struct bld_register *base, int n)
+{
+   int i, c;
+
+   for (i = 0; i < n; ++i)
+      for (c = 0; c < 4; ++c)
+         util_dynarray_fini(&base[i * 4 + c].vals);
+}
+
+int
+nvc0_tgsi_to_nc(struct nv_pc *pc, struct nvc0_translation_info *ti)
+{
+   struct bld_context *bld = CALLOC_STRUCT(bld_context);
+   unsigned ip;
+
+   pc->root[0] = pc->current_block = new_basic_block(pc);
+
+   bld->pc = pc;
+   bld->ti = ti;
+
+   pc->loop_nesting_bound = 1;
+
+   bld->zero = new_value(pc, NV_FILE_GPR, 4);
+   bld->zero->reg.id = 63;
+
+   if (pc->is_fragprog) {
+      struct nv_value *mem = new_value(pc, NV_FILE_MEM_V, 4);
+      mem->reg.address = 0x7c;
+
+      bld->frag_coord[3] = bld_insn_1(bld, NV_OP_LINTERP, mem);
+      bld->frag_coord[3] = bld_insn_1(bld, NV_OP_RCP, bld->frag_coord[3]);
+   }
+
+   for (ip = 0; ip < ti->num_insns; ++ip)
+      bld_instruction(bld, &ti->insns[ip]);
+
+   bld_free_registers(&bld->tvs[0][0], BLD_MAX_TEMPS);
+   bld_free_registers(&bld->avs[0][0], BLD_MAX_ADDRS);
+   bld_free_registers(&bld->pvs[0][0], BLD_MAX_PREDS);
+   bld_free_registers(&bld->ovs[0][0], PIPE_MAX_SHADER_OUTPUTS);
+
+   FREE(bld);
+   return 0;
+}
+
+/* If a variable is assigned in a loop, replace all references to the value
+ * from outside the loop with a phi value.
+ */
+static void
+bld_replace_value(struct nv_pc *pc, struct nv_basic_block *b,
+                  struct nv_value *old_val,
+                  struct nv_value *new_val)
+{
+   struct nv_instruction *nvi;
+
+   for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = nvi->next) {
+      int s;
+      for (s = 0; s < 6 && nvi->src[s]; ++s)
+         if (nvi->src[s]->value == old_val)
+            nv_reference(pc, nvi, s, new_val);
+   }
+
+   b->pass_seq = pc->pass_seq;
+
+   if (b->out[0] && b->out[0]->pass_seq < pc->pass_seq)
+      bld_replace_value(pc, b->out[0], old_val, new_val);
+
+   if (b->out[1] && b->out[1]->pass_seq < pc->pass_seq)
+      bld_replace_value(pc, b->out[1], old_val, new_val);
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.c b/src/gallium/drivers/nvc0/nvc0_transfer.c
new file mode 100644 (file)
index 0000000..adf9192
--- /dev/null
@@ -0,0 +1,322 @@
+
+#include "util/u_format.h"
+
+#include "nvc0_context.h"
+#include "nvc0_transfer.h"
+
+#include "nv50_defs.xml.h"
+
+struct nvc0_transfer {
+   struct pipe_transfer base;
+   struct nvc0_m2mf_rect rect[2];
+   uint32_t nblocksx;
+   uint32_t nblocksy;
+};
+
+static void
+nvc0_m2mf_transfer_rect(struct pipe_screen *pscreen,
+                        const struct nvc0_m2mf_rect *dst,
+                        const struct nvc0_m2mf_rect *src,
+                        uint32_t nblocksx, uint32_t nblocksy)
+{
+   struct nouveau_channel *chan = nouveau_screen(pscreen)->channel;
+   const int cpp = dst->cpp;
+   uint32_t src_ofst = src->base;
+   uint32_t dst_ofst = dst->base;
+   uint32_t height = nblocksy;
+   uint32_t sy = src->y;
+   uint32_t dy = dst->y;
+   uint32_t exec = (1 << 20);
+
+   assert(dst->cpp == src->cpp);
+
+   if (src->bo->tile_flags) {      
+      BEGIN_RING(chan, RING_MF(TILING_MODE_IN), 5);
+      OUT_RING  (chan, src->tile_mode);
+      OUT_RING  (chan, src->width * cpp);
+      OUT_RING  (chan, src->height);
+      OUT_RING  (chan, src->depth);
+      OUT_RING  (chan, src->z);
+   } else {
+      src_ofst += src->y * src->pitch + src->x * cpp;
+
+      BEGIN_RING(chan, RING_MF(PITCH_IN), 1);
+      OUT_RING  (chan, src->width * cpp);
+
+      exec |= NVC0_M2MF_EXEC_LINEAR_IN;
+   }
+
+   if (dst->bo->tile_flags) {
+      BEGIN_RING(chan, RING_MF(TILING_MODE_OUT), 5);
+      OUT_RING  (chan, dst->tile_mode);
+      OUT_RING  (chan, dst->width * cpp);
+      OUT_RING  (chan, dst->height);
+      OUT_RING  (chan, dst->depth);
+      OUT_RING  (chan, dst->z);
+   } else {
+      dst_ofst += dst->y * dst->pitch + dst->x * cpp;
+
+      BEGIN_RING(chan, RING_MF(PITCH_OUT), 1);
+      OUT_RING  (chan, dst->width * cpp);
+
+      exec |= NVC0_M2MF_EXEC_LINEAR_OUT;
+   }
+
+   while (height) {
+      int line_count = height > 2047 ? 2047 : height;
+
+      MARK_RING (chan, 17, 4);
+
+      BEGIN_RING(chan, RING_MF(OFFSET_IN_HIGH), 2);
+      OUT_RELOCh(chan, src->bo, src_ofst, src->domain | NOUVEAU_BO_RD);
+      OUT_RELOCl(chan, src->bo, src_ofst, src->domain | NOUVEAU_BO_RD);
+
+      BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2);
+      OUT_RELOCh(chan, dst->bo, dst_ofst, dst->domain | NOUVEAU_BO_WR);
+      OUT_RELOCl(chan, dst->bo, dst_ofst, dst->domain | NOUVEAU_BO_WR);
+
+      if (!(exec & NVC0_M2MF_EXEC_LINEAR_IN)) {
+         BEGIN_RING(chan, RING_MF(TILING_POSITION_IN_X), 2);
+         OUT_RING  (chan, src->x * cpp);
+         OUT_RING  (chan, sy);
+      } else {
+         src_ofst += line_count * src->pitch;
+      }
+      if (!(exec & NVC0_M2MF_EXEC_LINEAR_OUT)) {
+         BEGIN_RING(chan, RING_MF(TILING_POSITION_OUT_X), 2);
+         OUT_RING  (chan, dst->x * cpp);
+         OUT_RING  (chan, dy);
+      } else {
+         dst_ofst += line_count * dst->pitch;
+      }
+
+      BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2);
+      OUT_RING  (chan, nblocksx * cpp);
+      OUT_RING  (chan, line_count);
+      BEGIN_RING(chan, RING_MF(EXEC), 1);
+      OUT_RING  (chan, exec);
+
+      height -= line_count;
+      sy += line_count;
+      dy += line_count;
+   }
+}
+
+void
+nvc0_m2mf_push_linear(struct nvc0_context *nvc0,
+                      struct nouveau_bo *dst, unsigned domain, int offset,
+                      unsigned size, void *data)
+{
+   struct nouveau_channel *chan = nvc0->screen->base.channel;
+   uint32_t *src = (uint32_t *)data;
+   unsigned count = (size + 3) / 4;
+
+   BEGIN_RING(chan, RING_MF(OFFSET_OUT_HIGH), 2);
+   OUT_RELOCh(chan, dst, offset, domain | NOUVEAU_BO_WR);
+   OUT_RELOCl(chan, dst, offset, domain | NOUVEAU_BO_WR);
+   BEGIN_RING(chan, RING_MF(LINE_LENGTH_IN), 2);
+   OUT_RING  (chan, size);
+   OUT_RING  (chan, 1);
+   BEGIN_RING(chan, RING_MF(EXEC), 1);
+   OUT_RING  (chan, 0x100111);
+
+   while (count) {
+      unsigned nr = AVAIL_RING(chan);
+
+      if (nr < 9) {
+         FIRE_RING(chan);
+         continue;
+      }
+      nr = MIN2(count, nr - 1);
+      nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN);
+   
+      BEGIN_RING_NI(chan, RING_MF(DATA), nr);
+      OUT_RINGp (chan, src, nr);
+
+      src += nr;
+      count -= nr;
+   }
+}
+
+static void
+nvc0_sifc_push_rect(struct pipe_screen *pscreen,
+                    const struct nvc0_m2mf_rect *dst, unsigned dst_format,
+                    unsigned src_format, unsigned src_pitch, void *src,
+                    unsigned nblocksx, unsigned nblocksy)
+{
+   struct nouveau_channel *chan;
+
+   if (dst->bo->tile_flags) {
+      BEGIN_RING(chan, RING_2D(DST_FORMAT), 5);
+      OUT_RING  (chan, dst_format);
+      OUT_RING  (chan, 0);
+      OUT_RING  (chan, dst->tile_mode);
+      OUT_RING  (chan, 1);
+      OUT_RING  (chan, 0);
+   } else {
+      BEGIN_RING(chan, RING_2D(DST_FORMAT), 2);
+      OUT_RING  (chan, NV50_SURFACE_FORMAT_A8R8G8B8_UNORM);
+      OUT_RING  (chan, 1);
+      BEGIN_RING(chan, RING_2D(DST_PITCH), 1);
+      OUT_RING  (chan, dst->pitch);
+   }
+
+   BEGIN_RING(chan, RING_2D(DST_WIDTH), 4);
+   OUT_RING  (chan, dst->width);
+   OUT_RING  (chan, dst->height);
+   OUT_RELOCh(chan, dst->bo, dst->base, dst->domain | NOUVEAU_BO_WR);
+   OUT_RELOCl(chan, dst->bo, dst->base, dst->domain | NOUVEAU_BO_WR);
+
+   BEGIN_RING(chan, RING_2D(SIFC_BITMAP_ENABLE), 2);
+   OUT_RING  (chan, 0);
+   OUT_RING  (chan, src_format);
+   BEGIN_RING(chan, RING_2D(SIFC_WIDTH), 10);
+   OUT_RING  (chan, nblocksx);
+   OUT_RING  (chan, nblocksy);
+   OUT_RING  (chan, 0);
+   OUT_RING  (chan, 1);
+   OUT_RING  (chan, 0);
+   OUT_RING  (chan, 1);
+   OUT_RING  (chan, 0);
+   OUT_RING  (chan, dst->x);
+   OUT_RING  (chan, 0);
+   OUT_RING  (chan, dst->y);
+
+   while (nblocksy) {
+
+      src = (uint8_t *)src + src_pitch;
+   }
+}
+
+struct pipe_transfer *
+nvc0_miptree_transfer_new(struct pipe_context *pctx,
+                          struct pipe_resource *res,
+                          struct pipe_subresource sr,
+                          unsigned usage,
+                          const struct pipe_box *box)
+{
+   struct nvc0_context *nvc0 = nvc0_context(pctx);
+   struct pipe_screen *pscreen = pctx->screen;
+   struct nouveau_device *dev = nvc0->screen->base.device;
+   struct nvc0_miptree *mt = nvc0_miptree(res);
+   struct nvc0_miptree_level *lvl = &mt->level[sr.level];
+   struct nvc0_transfer *tx;
+   uint32_t image;
+   uint32_t w, h, z;
+   int ret;
+
+   if (res->target == PIPE_TEXTURE_CUBE)
+      image = sr.face;
+   else
+      image = 0;
+
+   tx = CALLOC_STRUCT(nvc0_transfer);
+   if (!tx)
+      return NULL;
+
+   pipe_resource_reference(&tx->base.resource, res);
+
+   tx->base.sr = sr;
+   tx->base.usage = usage;
+   tx->base.box = *box;
+
+   tx->nblocksx = util_format_get_nblocksx(res->format, box->width);
+   tx->nblocksy = util_format_get_nblocksy(res->format, box->height);
+
+   tx->base.stride = tx->nblocksx * util_format_get_blocksize(res->format);
+
+   w = u_minify(res->width0, sr.level);
+   h = u_minify(res->height0, sr.level);
+
+   tx->rect[0].cpp = tx->rect[1].cpp = util_format_get_blocksize(res->format);
+
+   tx->rect[0].bo = mt->base.bo;
+   tx->rect[0].base = lvl->image_offset[image];
+   tx->rect[0].tile_mode = lvl->tile_mode;
+   tx->rect[0].x = util_format_get_nblocksx(res->format, box->x);
+   tx->rect[0].y = util_format_get_nblocksx(res->format, box->y);
+   tx->rect[0].z = box->z;
+   tx->rect[0].width = util_format_get_nblocksx(res->format, w);
+   tx->rect[0].height = util_format_get_nblocksx(res->format, h);
+   tx->rect[0].depth = res->depth0;
+   tx->rect[0].pitch = lvl->pitch;
+   tx->rect[0].domain = NOUVEAU_BO_VRAM;
+
+   ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0,
+                        tx->nblocksy * tx->base.stride, &tx->rect[1].bo);
+   if (ret) {
+      FREE(tx);
+      return NULL;
+   }
+
+   tx->rect[1].width = tx->nblocksx;
+   tx->rect[1].height = tx->nblocksy;
+   tx->rect[1].depth = box->depth;
+   tx->rect[1].pitch = tx->base.stride;
+   tx->rect[1].domain = NOUVEAU_BO_GART;
+
+   if (usage & PIPE_TRANSFER_READ) {
+      for (z = 0; z < box->depth; ++z) {
+         nvc0_m2mf_transfer_rect(pscreen, &tx->rect[1], &tx->rect[0],
+                                 tx->nblocksx, tx->nblocksy);
+         tx->rect[0].z++;
+      }
+   }
+   tx->rect[0].z = box->z;
+
+   return &tx->base;
+}
+
+void
+nvc0_miptree_transfer_del(struct pipe_context *pctx,
+                          struct pipe_transfer *transfer)
+{
+   struct pipe_screen *pscreen = pctx->screen;
+   struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer;
+   unsigned z;
+
+   if (tx->base.usage & PIPE_TRANSFER_WRITE) {
+      for (z = 0; z < tx->base.box.depth; ++z) {
+         nvc0_m2mf_transfer_rect(pscreen, &tx->rect[0], &tx->rect[1],
+                                 tx->nblocksx, tx->nblocksy);
+         tx->rect[0].z++;
+      }
+   }
+
+   nouveau_bo_ref(NULL, &tx->rect[1].bo);
+   pipe_resource_reference(&transfer->resource, NULL);
+
+   FREE(tx);
+}
+
+void *
+nvc0_miptree_transfer_map(struct pipe_context *pctx,
+                          struct pipe_transfer *transfer)
+{
+   struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer;
+   int ret;
+   unsigned flags = 0;
+
+   if (tx->rect[1].bo->map)
+      return tx->rect[1].bo->map;
+
+   if (transfer->usage & PIPE_TRANSFER_READ)
+      flags = NOUVEAU_BO_RD;
+   if (transfer->usage & PIPE_TRANSFER_WRITE)
+      flags |= NOUVEAU_BO_WR;
+
+   ret = nouveau_bo_map(tx->rect[1].bo, flags);
+   if (ret)
+      return NULL;
+   return tx->rect[1].bo->map;
+}
+
+void
+nvc0_miptree_transfer_unmap(struct pipe_context *pctx,
+                            struct pipe_transfer *transfer)
+{
+   struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer;
+
+   nouveau_bo_unmap(tx->rect[1].bo);
+}
+
diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.h b/src/gallium/drivers/nvc0/nvc0_transfer.h
new file mode 100644 (file)
index 0000000..aaebe40
--- /dev/null
@@ -0,0 +1,38 @@
+
+#ifndef __NVC0_TRANSFER_H__
+#define __NVC0_TRANSFER_H__
+
+#include "pipe/p_state.h"
+
+struct pipe_transfer *
+nvc0_miptree_transfer_new(struct pipe_context *pcontext,
+                          struct pipe_resource *pt,
+                          struct pipe_subresource sr,
+                          unsigned usage,
+                          const struct pipe_box *box);
+void
+nvc0_miptree_transfer_del(struct pipe_context *pcontext,
+                          struct pipe_transfer *ptx);
+void *
+nvc0_miptree_transfer_map(struct pipe_context *pcontext,
+                          struct pipe_transfer *ptx);
+void
+nvc0_miptree_transfer_unmap(struct pipe_context *pcontext,
+                            struct pipe_transfer *ptx);
+
+struct nvc0_m2mf_rect {
+   struct nouveau_bo *bo;
+   uint32_t base;
+   unsigned domain;
+   uint32_t pitch;
+   uint32_t width;
+   uint32_t x;
+   uint32_t height;
+   uint32_t y;
+   uint16_t depth;
+   uint16_t z;
+   uint16_t tile_mode;
+   uint16_t cpp;
+};
+
+#endif
diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c
new file mode 100644 (file)
index 0000000..1fc8422
--- /dev/null
@@ -0,0 +1,462 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "translate/translate.h"
+
+#include "nvc0_context.h"
+#include "nvc0_resource.h"
+
+#include "nvc0_3d.xml.h"
+
+void
+nvc0_vertex_state_delete(struct pipe_context *pipe,
+                         void *hwcso)
+{
+   struct nvc0_vertex_stateobj *so = hwcso;
+
+   if (so->translate)
+      so->translate->release(so->translate);
+   FREE(hwcso);
+}
+
+void *
+nvc0_vertex_state_create(struct pipe_context *pipe,
+                         unsigned num_elements,
+                         const struct pipe_vertex_element *elements)
+{
+    struct nvc0_vertex_stateobj *so;
+    struct translate_key transkey;
+    unsigned i;
+
+    assert(num_elements);
+
+    so = MALLOC(sizeof(*so) +
+                (num_elements - 1) * sizeof(struct nvc0_vertex_element));
+    if (!so)
+        return NULL;
+    so->num_elements = num_elements;
+    so->instance_bits = 0;
+
+    transkey.nr_elements = 0;
+    transkey.output_stride = 0;
+
+    for (i = 0; i < num_elements; ++i) {
+        const struct pipe_vertex_element *ve = &elements[i];
+        const unsigned vbi = ve->vertex_buffer_index;
+        enum pipe_format fmt = ve->src_format;
+
+        so->element[i].pipe = elements[i];
+        so->element[i].state = nvc0_format_table[fmt].vtx;
+
+        if (!so->element[i].state) {
+            switch (util_format_get_nr_components(fmt)) {
+            case 1: fmt = PIPE_FORMAT_R32_FLOAT; break;
+            case 2: fmt = PIPE_FORMAT_R32G32_FLOAT; break;
+            case 3: fmt = PIPE_FORMAT_R32G32B32_FLOAT; break;
+            case 4: fmt = PIPE_FORMAT_R32G32B32A32_FLOAT; break;
+            default:
+                assert(0);
+                return NULL;
+            }
+            so->element[i].state = nvc0_format_table[fmt].vtx;
+        }
+        so->element[i].state |= i;
+
+        if (likely(!ve->instance_divisor)) {
+            unsigned j = transkey.nr_elements++;
+
+            transkey.element[j].type = TRANSLATE_ELEMENT_NORMAL;
+            transkey.element[j].input_format = ve->src_format;
+            transkey.element[j].input_buffer = vbi;
+            transkey.element[j].input_offset = ve->src_offset;
+            transkey.element[j].instance_divisor = ve->instance_divisor;
+
+            transkey.element[j].output_format = fmt;
+            transkey.element[j].output_offset = transkey.output_stride;
+            transkey.output_stride += (util_format_get_stride(fmt, 1) + 3) & ~3;
+        } else {
+           so->instance_bits |= 1 << i;
+        }
+    }
+
+    so->translate = translate_create(&transkey);
+    so->vtx_size = transkey.output_stride / 4;
+    so->vtx_per_packet_max = NV04_PFIFO_MAX_PACKET_LEN / MAX2(so->vtx_size, 1);
+
+    return so;
+}
+
+#define NVC0_3D_VERTEX_ATTRIB_INACTIVE                                       \
+   NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT |                                 \
+   NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32 | NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST
+
+void
+nvc0_vertex_arrays_validate(struct nvc0_context *nvc0)
+{
+   struct nouveau_channel *chan = nvc0->screen->base.channel;
+   struct nvc0_vertex_stateobj *vertex = nvc0->vertex;
+   struct pipe_vertex_buffer *vb;
+   struct nvc0_vertex_element *ve;
+   unsigned i;
+
+   nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_VERTEX);
+
+   nvc0->vbo_fifo = 0;
+
+   BEGIN_RING(chan, RING_3D(VERTEX_ATTRIB_FORMAT(0)), vertex->num_elements);
+   for (i = 0; i < vertex->num_elements; ++i) {
+      ve = &vertex->element[i];
+      vb = &nvc0->vtxbuf[ve->pipe.vertex_buffer_index];
+
+      if (!nvc0_resource_mapped_by_gpu(vb->buffer) || 1)
+         nvc0->vbo_fifo |= 1 << i;
+
+      if (1 || likely(vb->stride)) {
+         OUT_RING(chan, ve->state);
+      } else {
+         OUT_RING(chan, ve->state | NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST);
+      }
+   }
+
+   for (i = 0; i < vertex->num_elements; ++i) {
+      struct nouveau_bo *bo;
+      unsigned size, offset;
+      
+      ve = &vertex->element[i];
+      vb = &nvc0->vtxbuf[ve->pipe.vertex_buffer_index];
+
+      if (nvc0->vbo_fifo || (0 && vb->stride == 0)) {
+#if 0
+         if (!nvc0->vbo_fifo)
+            nvc0_vbo_constant_attrib(nvc0, vb, ve);
+#endif
+         BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1);
+         OUT_RING  (chan, 0);
+         continue;
+      }
+
+      bo = nvc0_resource(vb->buffer)->bo;
+      size = vb->buffer->width0;
+      offset = ve->pipe.src_offset + vb->buffer_offset;
+
+      if (unlikely(ve->pipe.instance_divisor)) {
+         if (!(nvc0->state.instance_bits & (1 << i))) {
+            INLIN_RING(chan, RING_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 1);
+         }
+         BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_DIVISOR(i)), 1);
+         OUT_RING  (chan, ve->pipe.instance_divisor);
+      } else
+      if (unlikely(nvc0->state.instance_bits & (1 << i))) {
+         INLIN_RING(chan, RING_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 0);
+      }
+
+      nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_VERTEX,
+                               nvc0_resource(vb->buffer), NOUVEAU_BO_RD);
+
+      BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1);
+      OUT_RING  (chan, (1 << 12) | vb->stride);
+      BEGIN_RING_1I(chan, RING_3D(VERTEX_ARRAY_SELECT), 5);
+      OUT_RING  (chan, i);
+      OUT_RELOCh(chan, bo, size, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+      OUT_RELOCl(chan, bo, size, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+      OUT_RELOCh(chan, bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+      OUT_RELOCl(chan, bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+   }
+   for (; i < nvc0->state.num_vtxelts; ++i) {
+      BEGIN_RING(chan, RING_3D(VERTEX_ATTRIB_FORMAT(i)), 1);
+      OUT_RING  (chan, NVC0_3D_VERTEX_ATTRIB_INACTIVE);
+      BEGIN_RING(chan, RING_3D(VERTEX_ARRAY_FETCH(i)), 1);
+      OUT_RING  (chan, 0);
+   }
+
+   nvc0->state.num_vtxelts = vertex->num_elements;
+   nvc0->state.instance_bits = vertex->instance_bits;
+}
+
+#define NVC0_PRIM_GL_CASE(n) \
+   case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
+
+static INLINE unsigned
+nvc0_prim_gl(unsigned prim)
+{
+   switch (prim) {
+   NVC0_PRIM_GL_CASE(POINTS);
+   NVC0_PRIM_GL_CASE(LINES);
+   NVC0_PRIM_GL_CASE(LINE_LOOP);
+   NVC0_PRIM_GL_CASE(LINE_STRIP);
+   NVC0_PRIM_GL_CASE(TRIANGLES);
+   NVC0_PRIM_GL_CASE(TRIANGLE_STRIP);
+   NVC0_PRIM_GL_CASE(TRIANGLE_FAN);
+   NVC0_PRIM_GL_CASE(QUADS);
+   NVC0_PRIM_GL_CASE(QUAD_STRIP);
+   NVC0_PRIM_GL_CASE(POLYGON);
+   NVC0_PRIM_GL_CASE(LINES_ADJACENCY);
+   NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
+   NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
+   NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
+   /*
+   NVC0_PRIM_GL_CASE(PATCHES); */
+   default:
+      return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
+      break;
+   }
+}
+
+static void
+nvc0_draw_vbo_flush_notify(struct nouveau_channel *chan)
+{
+   struct nvc0_context *nvc0 = chan->user_private;
+
+   nvc0_bufctx_emit_relocs(nvc0);
+
+   debug_printf("%s(%p)\n", __FUNCTION__, nvc0);
+}
+
+#if 0
+static struct nouveau_bo *
+nvc0_tfb_setup(struct nvc0_context *nvc0)
+{
+   struct nouveau_channel *chan = nvc0->screen->base.channel;
+   struct nouveau_bo *tfb = NULL;
+   int ret, i;
+
+   ret = nouveau_bo_new(nvc0->screen->base.device,
+                        NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, &tfb);
+   if (ret)
+      return NULL;
+
+   ret = nouveau_bo_map(tfb, NOUVEAU_BO_WR);
+   if (ret)
+      return NULL;
+   memset(tfb->map, 0xee, 8 * 4 * 3);
+   nouveau_bo_unmap(tfb);
+
+   BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1);
+   OUT_RING  (chan, 1);
+   BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(0)), 5);
+   OUT_RING  (chan, 1);
+   OUT_RELOCh(chan, tfb, 0, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
+   OUT_RELOCl(chan, tfb, 0, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
+   OUT_RING  (chan, tfb->size);
+   OUT_RING  (chan, 0); /* TFB_PRIMITIVE_ID(0) */
+   BEGIN_RING(chan, RING_3D(TFB_UNK0700(0)), 3);
+   OUT_RING  (chan, 0);
+   OUT_RING  (chan, 8); /* TFB_VARYING_COUNT(0) */
+   OUT_RING  (chan, 32); /* TFB_BUFFER_STRIDE(0) */
+   BEGIN_RING(chan, RING_3D(TFB_VARYING_LOCS(0)), 2);
+   OUT_RING  (chan, 0x1f1e1d1c);
+   OUT_RING  (chan, 0xa3a2a1a0);
+   for (i = 1; i < 4; ++i) {
+      BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(i)), 1);
+      OUT_RING  (chan, 0);
+   }
+   BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1);
+   OUT_RING  (chan, 1);
+   BEGIN_RING(chan, RING_3D_(0x135c), 1);
+   OUT_RING  (chan, 1);
+   BEGIN_RING(chan, RING_3D_(0x135c), 1);
+   OUT_RING  (chan, 0);
+
+   return tfb;
+}
+#endif
+
+static void
+nvc0_draw_arrays(struct nvc0_context *nvc0,
+                 unsigned mode, unsigned start, unsigned count,
+                 unsigned start_instance, unsigned instance_count)
+{
+   struct nouveau_channel *chan = nvc0->screen->base.channel;
+   unsigned prim;
+
+   chan->flush_notify = nvc0_draw_vbo_flush_notify;
+   chan->user_private = nvc0;
+
+   prim = nvc0_prim_gl(mode);
+
+   if (nvc0->state.instance_base != start_instance) {
+      nvc0->state.instance_base = start_instance;
+      BEGIN_RING(chan, RING_3D(VB_INSTANCE_BASE), 1);
+      OUT_RING  (chan, start_instance);
+   }
+
+   while (instance_count--) {
+      BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1);
+      OUT_RING  (chan, prim);
+      BEGIN_RING(chan, RING_3D(VERTEX_BUFFER_FIRST), 2);
+      OUT_RING  (chan, start);
+      OUT_RING  (chan, count);
+      INLIN_RING(chan, RING_3D(VERTEX_END_GL), 0);
+
+      prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+   }
+
+   chan->flush_notify = NULL;
+}
+
+static void
+nvc0_draw_elements_inline_u08(struct nouveau_channel *chan, uint8_t *map,
+                              unsigned start, unsigned count)
+{
+   map += start;
+
+   if (count & 3) {
+      unsigned i;
+      BEGIN_RING(chan, RING_3D(VB_ELEMENT_U32), count & 3);
+      for (i = 0; i < (count & 3); ++i)
+         OUT_RING(chan, *map++);
+      count &= ~3;
+   }
+   while (count) {
+      unsigned i, nr = MIN2(count, (NV04_PFIFO_MAX_PACKET_LEN & ~3) * 4) / 4;
+
+      BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U8), nr);
+      for (i = 0; i < nr; ++i) {
+         OUT_RING(chan,
+                  (map[3] << 24) | (map[2] << 16) | (map[1] << 8) | map[0]);
+         map += 4;
+      }
+      count -= nr;
+   }
+}
+
+static void
+nvc0_draw_elements_inline_u16(struct nouveau_channel *chan, uint16_t *map,
+                              unsigned start, unsigned count)
+{
+   map += start;
+
+   if (count & 1) {
+      count &= ~1;
+      BEGIN_RING(chan, RING_3D(VB_ELEMENT_U32), 1);
+      OUT_RING  (chan, *map++);
+   }
+   while (count) {
+      unsigned i, nr = MIN2(count, (NV04_PFIFO_MAX_PACKET_LEN & ~1) * 2) / 2;
+
+      BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U16), nr);
+      for (i = 0; i < nr; ++i) {
+         OUT_RING(chan, (map[1] << 16) | map[0]);
+         map += 2;
+      }
+      count -= nr;
+   }
+}
+
+static void
+nvc0_draw_elements_inline_u32(struct nouveau_channel *chan, uint32_t *map,
+                              unsigned start, unsigned count)
+{
+   map += start;
+
+   while (count) {
+      unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN);
+
+      BEGIN_RING_NI(chan, RING_3D(VB_ELEMENT_U32), nr);
+      for (i = 0; i < nr; ++i)
+         OUT_RING(chan, *map++);
+
+      count -= nr;
+   }
+}
+
+static void
+nvc0_draw_elements(struct nvc0_context *nvc0,
+                   unsigned mode, unsigned start, unsigned count,
+                   unsigned start_instance, unsigned instance_count,
+                   unsigned index_size, int index_bias)
+{
+   struct nouveau_channel *chan = nvc0->screen->base.channel;
+   void *data;
+   struct pipe_transfer *transfer;
+   unsigned prim;
+
+   chan->flush_notify = nvc0_draw_vbo_flush_notify;
+   chan->user_private = nvc0;
+
+   prim = nvc0_prim_gl(mode);
+
+   data = pipe_buffer_map(&nvc0->pipe,
+                          nvc0->idxbuf.buffer, PIPE_TRANSFER_READ, &transfer);
+   if (!data)
+      return;
+
+   while (instance_count--) {
+      BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1);
+      OUT_RING  (chan, prim);
+      switch (index_size) {
+      case 1:
+         nvc0_draw_elements_inline_u08(chan, data, start, count);
+         break;
+      case 2:
+         nvc0_draw_elements_inline_u16(chan, data, start, count);
+         break;
+      case 4:
+         nvc0_draw_elements_inline_u32(chan, data, start, count);
+         break;
+      default:
+         assert(0);
+         return;
+      }
+      BEGIN_RING(chan, RING_3D(VERTEX_END_GL), 1);
+      OUT_RING  (chan, 0);
+   }
+
+   chan->flush_notify = NULL;
+}
+
+void
+nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
+{
+   struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+   nvc0_state_validate(nvc0);
+
+   if (nvc0->vbo_fifo) {
+      nvc0_push_vbo(nvc0, info);
+      return;
+   }
+
+   if (nvc0->vbo_dirty) {
+      BEGIN_RING(nvc0->screen->base.channel, RING_3D_(0x142c), 1);
+      OUT_RING  (nvc0->screen->base.channel, 0);
+      nvc0->vbo_dirty = FALSE;
+   }
+
+   if (!info->indexed) {
+      nvc0_draw_arrays(nvc0,
+                       info->mode, info->start, info->count,
+                       info->start_instance, info->instance_count);
+   } else
+   if (nvc0->idxbuf.buffer) {
+      nvc0_draw_elements(nvc0,
+                         info->mode, info->start, info->count,
+                         info->start_instance, info->instance_count,
+                         nvc0->idxbuf.index_size, info->index_bias);
+   } else {
+      NOUVEAU_ERR("draw_indexed: no index buffer\n");
+   }
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_winsys.h b/src/gallium/drivers/nvc0/nvc0_winsys.h
new file mode 100644 (file)
index 0000000..48ea876
--- /dev/null
@@ -0,0 +1,152 @@
+
+#ifndef __NVC0_WINSYS_H__
+#define __NVC0_WINSYS_H__
+
+#include <stdint.h>
+#include <unistd.h>
+#include "pipe/p_defines.h"
+
+#include "nouveau/nouveau_bo.h"
+#include "nouveau/nouveau_channel.h"
+#include "nouveau/nouveau_device.h"
+#include "nouveau/nouveau_resource.h"
+#include "nouveau/nouveau_reloc.h"
+
+#ifndef NV04_PFIFO_MAX_PACKET_LEN
+#define NV04_PFIFO_MAX_PACKET_LEN 2047
+#endif
+
+#define SLEEP(us) usleep(us)
+
+extern uint64_t nouveau_bo_gpu_address(struct nouveau_bo *);
+
+#define NVC0_SUBCH_3D 1
+#define NVC0_SUBCH_2D 2
+#define NVC0_SUBCH_MF 3
+
+#define NVC0_MF_(n) NVC0_M2MF_##n
+
+#define RING_3D(n) ((NVC0_SUBCH_3D << 13) | (NVC0_3D_##n >> 2))
+#define RING_2D(n) ((NVC0_SUBCH_2D << 13) | (NVC0_2D_##n >> 2))
+#define RING_MF(n) ((NVC0_SUBCH_MF << 13) | (NVC0_MF_(n) >> 2))
+
+#define RING_3D_(m) ((NVC0_SUBCH_3D << 13) | ((m) >> 2))
+#define RING_2D_(m) ((NVC0_SUBCH_2D << 13) | ((m) >> 2))
+#define RING_MF_(m) ((NVC0_SUBCH_MF << 13) | ((m) >> 2))
+
+#define RING_ANY(m) ((NVC0_SUBCH_3D << 13) | ((m) >> 2))
+
+int nouveau_pushbuf_flush(struct nouveau_channel *, unsigned min);
+
+static INLINE void
+WAIT_RING(struct nouveau_channel *chan, unsigned size)
+{
+   if (chan->cur + size > chan->end)
+      nouveau_pushbuf_flush(chan, size);
+}
+
+static INLINE void
+OUT_RING(struct nouveau_channel *chan, uint32_t data)
+{
+   *(chan->cur++) = (data);
+}
+
+/* incremental methods */
+static INLINE void
+BEGIN_RING(struct nouveau_channel *chan, uint32_t mthd, unsigned size)
+{
+   WAIT_RING(chan, size + 1);
+   OUT_RING (chan, (0x2 << 28) | (size << 16) | mthd);
+}
+
+/* non-incremental */
+static INLINE void
+BEGIN_RING_NI(struct nouveau_channel *chan, uint32_t mthd, unsigned size)
+{
+   WAIT_RING(chan, size + 1);
+   OUT_RING (chan, (0x6 << 28) | (size << 16) | mthd);
+}
+
+/* increment-once */
+static INLINE void
+BEGIN_RING_1I(struct nouveau_channel *chan, uint32_t mthd, unsigned size)
+{
+   WAIT_RING(chan, size + 1);
+   OUT_RING (chan, (0xa << 28) | (size << 16) | mthd);
+}
+
+/* inline-data */
+static INLINE void
+INLIN_RING(struct nouveau_channel *chan, uint32_t mthd, unsigned data)
+{
+   WAIT_RING(chan, 1);
+   OUT_RING (chan, (0x8 << 28) | (data << 16) | mthd);
+}
+
+int
+nouveau_pushbuf_marker_emit(struct nouveau_channel *chan,
+                            unsigned wait_dwords, unsigned wait_relocs);
+int
+nouveau_pushbuf_emit_reloc(struct nouveau_channel *, void *ptr,
+                           struct nouveau_bo *, uint32_t data, uint32_t data2,
+                           uint32_t flags, uint32_t vor, uint32_t tor);
+int
+nouveau_pushbuf_submit(struct nouveau_channel *chan, struct nouveau_bo *bo,
+                       unsigned offset, unsigned length);
+
+static INLINE int
+MARK_RING(struct nouveau_channel *chan, unsigned dwords, unsigned relocs)
+{
+   return nouveau_pushbuf_marker_emit(chan, dwords, relocs);
+}
+
+static INLINE void
+OUT_RINGf(struct nouveau_channel *chan, float data)
+{
+   union { uint32_t i; float f; } u;
+   u.f = data;
+   OUT_RING(chan, u.i);
+}
+
+static INLINE unsigned
+AVAIL_RING(struct nouveau_channel *chan)
+{
+   return chan->end - chan->cur;
+}
+
+static INLINE void
+OUT_RINGp(struct nouveau_channel *chan, const void *data, unsigned size)
+{
+   memcpy(chan->cur, data, size * 4);
+   chan->cur += size;
+}
+
+static INLINE int
+OUT_RELOC(struct nouveau_channel *chan, struct nouveau_bo *bo,
+          unsigned data, unsigned flags, unsigned vor, unsigned tor)
+{
+   return nouveau_pushbuf_emit_reloc(chan, chan->cur++, bo,
+                                     data, 0, flags, vor, tor);
+}
+
+static INLINE int
+OUT_RELOCl(struct nouveau_channel *chan, struct nouveau_bo *bo,
+           unsigned delta, unsigned flags)
+{
+   return OUT_RELOC(chan, bo, delta, flags | NOUVEAU_BO_LOW, 0, 0);
+}
+
+static INLINE int
+OUT_RELOCh(struct nouveau_channel *chan, struct nouveau_bo *bo,
+           unsigned delta, unsigned flags)
+{
+   return OUT_RELOC(chan, bo, delta, flags | NOUVEAU_BO_HIGH, 0, 0);
+}
+
+static INLINE void
+FIRE_RING(struct nouveau_channel *chan)
+{
+   nouveau_pushbuf_flush(chan, 0);
+}
+
+#endif
index 2f64f312b841cdad42c8f7120ddd8ac10fd4a5b4..eb1ee859a00389a594b18c1b5ded32fcec1332ca 100644 (file)
@@ -10,6 +10,7 @@ PIPE_DRIVERS = \
        $(TOP)/src/gallium/drivers/rbug/librbug.a \
        $(TOP)/src/gallium/drivers/nvfx/libnvfx.a \
        $(TOP)/src/gallium/drivers/nv50/libnv50.a \
+       $(TOP)/src/gallium/drivers/nvc0/libnvc0.a \
        $(TOP)/src/gallium/drivers/nouveau/libnouveau.a
 
 C_SOURCES = \
index d4bf124ce6f2c28ae43831098b2298734cdb0d48..648d6c8a8e21fdb8196ccbf0739de6e140db6182 100644 (file)
@@ -50,6 +50,9 @@ nouveau_drm_screen_create(int fd)
        case 0xa0:
                init = nv50_screen_create;
                break;
+       case 0xc0:
+               init = nvc0_screen_create;
+               break;
        default:
                debug_printf("%s: unknown chipset nv%02x\n", __func__,
                             dev->chipset);