Merge remote branch 'origin/master' into nv50-compiler
authorChristoph Bumiller <e0425955@student.tuwien.ac.at>
Thu, 2 Sep 2010 16:31:49 +0000 (18:31 +0200)
committerChristoph Bumiller <e0425955@student.tuwien.ac.at>
Thu, 2 Sep 2010 16:31:49 +0000 (18:31 +0200)
Conflicts:
src/gallium/drivers/nv50/nv50_program.c

23 files changed:
src/gallium/drivers/nouveau/nouveau_class.h
src/gallium/drivers/nv50/Makefile
src/gallium/drivers/nv50/SConscript
src/gallium/drivers/nv50/nv50_formats.c [new file with mode: 0644]
src/gallium/drivers/nv50/nv50_miptree.c
src/gallium/drivers/nv50/nv50_pc.c [new file with mode: 0644]
src/gallium/drivers/nv50/nv50_pc.h [new file with mode: 0644]
src/gallium/drivers/nv50/nv50_pc_emit.c [new file with mode: 0644]
src/gallium/drivers/nv50/nv50_pc_optimize.c [new file with mode: 0644]
src/gallium/drivers/nv50/nv50_pc_print.c [new file with mode: 0644]
src/gallium/drivers/nv50/nv50_pc_regalloc.c [new file with mode: 0644]
src/gallium/drivers/nv50/nv50_program.c
src/gallium/drivers/nv50/nv50_program.h
src/gallium/drivers/nv50/nv50_push.c
src/gallium/drivers/nv50/nv50_screen.c
src/gallium/drivers/nv50/nv50_screen.h
src/gallium/drivers/nv50/nv50_shader_state.c [new file with mode: 0644]
src/gallium/drivers/nv50/nv50_state.c
src/gallium/drivers/nv50/nv50_state_validate.c
src/gallium/drivers/nv50/nv50_tex.c
src/gallium/drivers/nv50/nv50_texture.h
src/gallium/drivers/nv50/nv50_tgsi_to_nc.c [new file with mode: 0644]
src/gallium/drivers/nv50/nv50_vbo.c

index 20941f379c770de2a0a9130e2606cb59873b522b..d9f35b4c4b9683813c5e228d02bae4702f786d7d 100644 (file)
@@ -735,6 +735,45 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define  NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_OUT_HIGH                                  0x0000023c
 
 
+#define NVC0_MEMORY_TO_MEMORY_FORMAT                                                   0x00009039
+
+#define  NVC0_MEMORY_TO_MEMORY_FORMAT_NOP                                              0x00000100
+#define  NVC0_MEMORY_TO_MEMORY_FORMAT_SERIALIZE                                                0x00000110
+#define  NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_MODE_IN                                   0x00000204
+#define  NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_PITCH_IN                                  0x00000208
+#define  NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_HEIGHT_IN                                 0x0000020c
+#define  NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_DEPTH_IN                                  0x00000210
+#define  NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN_Z                             0x00000214
+#define  NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_MODE_OUT                                  0x00000220
+#define  NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_PITCH_OUT                                 0x00000224
+#define  NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_HEIGHT_OUT                                        0x00000228
+#define  NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_DEPTH_OUT                                 0x0000022c
+#define  NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT_Z                            0x00000230
+#define  NVC0_MEMORY_TO_MEMORY_FORMAT_OFFSET_OUT_HIGH                                  0x00000238
+#define  NVC0_MEMORY_TO_MEMORY_FORMAT_OFFSET_OUT_LOW                                   0x0000023c
+#define  NVC0_MEMORY_TO_MEMORY_FORMAT_EXEC                                             0x00000300
+#define   NVC0_MEMORY_TO_MEMORY_FORMAT_EXEC_PUSH                                       (1 <<  0)
+#define   NVC0_MEMORY_TO_MEMORY_FORMAT_EXEC_LINEAR_IN                                  (1 <<  4)
+#define   NVC0_MEMORY_TO_MEMORY_FORMAT_EXEC_LINEAR_OUT                                 (1 <<  8)
+#define   NVC0_MEMORY_TO_MEMORY_FORMAT_EXEC_NOTIFY                                     (1 << 13)
+#define   NVC0_MEMORY_TO_MEMORY_FORMAT_EXEC_INC_SHIFT                                  20
+#define   NVC0_MEMORY_TO_MEMORY_FORMAT_EXEC_INC_MASK                                   0x00f00000
+#define  NVC0_MEMORY_TO_MEMORY_FORMAT_DATA                                             0x00000304
+#define  NVC0_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN_HIGH                                   0x0000030c
+#define  NVC0_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN_LOW                                    0x00000310
+#define  NVC0_MEMORY_TO_MEMORY_FORMAT_PITCH_IN                                         0x00000314
+#define  NVC0_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT                                                0x00000318
+#define  NVC0_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN                                   0x0000031c
+#define  NVC0_MEMORY_TO_MEMORY_FORMAT_LINE_COUNT                                       0x00000320
+#define  NVC0_MEMORY_TO_MEMORY_FORMAT_NOTIFY_ADDRESS_HIGH                              0x0000032c
+#define  NVC0_MEMORY_TO_MEMORY_FORMAT_NOTIFY_ADDRESS_LOW                               0x00000330
+#define  NVC0_MEMORY_TO_MEMORY_FORMAT_NOTIFY                                           0x00000334
+#define  NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN_X                             0x00000344
+#define  NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN_Y                             0x00000348
+#define  NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT_X                            0x0000034c
+#define  NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT_Y                            0x00000350
+
+
 #define NV01_MEMORY_LOCAL_BANKED                                                       0x0000003d
 
 
@@ -4507,6 +4546,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define   NV20TCL_VTXFMT_TYPE_SHIFT                                                    0
 #define   NV20TCL_VTXFMT_TYPE_MASK                                                     0x0000000f
 #define    NV20TCL_VTXFMT_TYPE_FLOAT                                                   0x00000002
+#define    NV20TCL_VTXFMT_TYPE_HALF                                                    0x00000003
 #define    NV20TCL_VTXFMT_TYPE_UBYTE                                                   0x00000004
 #define    NV20TCL_VTXFMT_TYPE_USHORT                                                  0x00000005
 #define   NV20TCL_VTXFMT_SIZE_SHIFT                                                    4
@@ -6999,6 +7039,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define   NV40TCL_VTXFMT_TYPE_SHIFT                                                    0
 #define   NV40TCL_VTXFMT_TYPE_MASK                                                     0x0000000f
 #define    NV40TCL_VTXFMT_TYPE_FLOAT                                                   0x00000002
+#define    NV40TCL_VTXFMT_TYPE_HALF                                                    0x00000003
 #define    NV40TCL_VTXFMT_TYPE_UBYTE                                                   0x00000004
 #define    NV40TCL_VTXFMT_TYPE_USHORT                                                  0x00000005
 #define   NV40TCL_VTXFMT_SIZE_SHIFT                                                    4
@@ -7708,7 +7749,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define  NV50TCL_DMA_TIC                                                               0x000001a0
 #define  NV50TCL_DMA_TEXTURE                                                           0x000001a4
 #define  NV50TCL_DMA_STRMOUT                                                           0x000001a8
-#define  NV50TCL_DMA_UNK01AC                                                           0x000001ac
+#define  NV50TCL_DMA_CLIPID                                                            0x000001ac
 #define  NV50TCL_DMA_COLOR(x)                                                          (0x000001c0+((x)*4))
 #define  NV50TCL_DMA_COLOR__SIZE                                                       0x00000008
 #define  NV50TCL_RT_ADDRESS_HIGH(x)                                                    (0x00000200+((x)*32))
@@ -7925,8 +7966,20 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define  NV50TCL_DEPTH_RANGE_FAR__SIZE                                                 0x00000010
 #define  NV50TCL_VIEWPORT_CLIP_HORIZ(x)                                                        (0x00000d00+((x)*8))
 #define  NV50TCL_VIEWPORT_CLIP_HORIZ__SIZE                                             0x00000008
+#define   NV50TCL_VIEWPORT_CLIP_HORIZ_MIN_SHIFT                                                0
+#define   NV50TCL_VIEWPORT_CLIP_HORIZ_MIN_MASK                                         0x0000ffff
+#define   NV50TCL_VIEWPORT_CLIP_HORIZ_MAX_SHIFT                                                16
+#define   NV50TCL_VIEWPORT_CLIP_HORIZ_MAX_MASK                                         0xffff0000
 #define  NV50TCL_VIEWPORT_CLIP_VERT(x)                                                 (0x00000d04+((x)*8))
 #define  NV50TCL_VIEWPORT_CLIP_VERT__SIZE                                              0x00000008
+#define   NV50TCL_VIEWPORT_CLIP_VERT_MIN_SHIFT                                         0
+#define   NV50TCL_VIEWPORT_CLIP_VERT_MIN_MASK                                          0x0000ffff
+#define   NV50TCL_VIEWPORT_CLIP_VERT_MAX_SHIFT                                         16
+#define   NV50TCL_VIEWPORT_CLIP_VERT_MAX_MASK                                          0xffff0000
+#define  NV50TCL_CLIPID_REGION_HORIZ(x)                                                        (0x00000d40+((x)*8))
+#define  NV50TCL_CLIPID_REGION_HORIZ__SIZE                                             0x00000004
+#define  NV50TCL_CLIPID_REGION_VERT(x)                                                 (0x00000d44+((x)*8))
+#define  NV50TCL_CLIPID_REGION_VERT__SIZE                                              0x00000004
 #define  NV50TCL_VERTEX_BUFFER_FIRST                                                   0x00000d74
 #define  NV50TCL_VERTEX_BUFFER_COUNT                                                   0x00000d78
 #define  NV50TCL_CLEAR_COLOR(x)                                                                (0x00000d80+((x)*4))
@@ -7984,14 +8037,16 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define  NV50TCL_GP_ADDRESS_LOW                                                                0x00000f74
 #define  NV50TCL_VP_ADDRESS_HIGH                                                       0x00000f7c
 #define  NV50TCL_VP_ADDRESS_LOW                                                                0x00000f80
-#define  NV50TCL_UNK0F84_ADDRESS_HIGH                                                  0x00000f84
-#define  NV50TCL_UNK0F84_ADDRESS_LOW                                                   0x00000f88
+#define  NV50TCL_VERTEX_RUNOUT_HIGH                                                    0x00000f84
+#define  NV50TCL_VERTEX_RUNOUT_LOW                                                     0x00000f88
 #define  NV50TCL_DEPTH_BOUNDS(x)                                                       (0x00000f9c+((x)*4))
 #define  NV50TCL_DEPTH_BOUNDS__SIZE                                                    0x00000002
 #define  NV50TCL_FP_ADDRESS_HIGH                                                       0x00000fa4
 #define  NV50TCL_FP_ADDRESS_LOW                                                                0x00000fa8
 #define  NV50TCL_MSAA_MASK(x)                                                          (0x00000fbc+((x)*4))
 #define  NV50TCL_MSAA_MASK__SIZE                                                       0x00000004
+#define  NV50TCL_CLIPID_ADDRESS_HIGH                                                   0x00000fcc
+#define  NV50TCL_CLIPID_ADDRESS_LOW                                                    0x00000fd0
 #define  NV50TCL_ZETA_ADDRESS_HIGH                                                     0x00000fe0
 #define  NV50TCL_ZETA_ADDRESS_LOW                                                      0x00000fe4
 #define  NV50TCL_ZETA_FORMAT                                                           0x00000fe8
@@ -8121,37 +8176,45 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define   NV50TCL_BLEND_EQUATION_RGB_FUNC_SUBTRACT                                     0x0000800a
 #define   NV50TCL_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT                             0x0000800b
 #define  NV50TCL_BLEND_FUNC_SRC_RGB                                                    0x00001344
-#define   NV50TCL_BLEND_FUNC_SRC_RGB_ZERO                                              0x00000000
-#define   NV50TCL_BLEND_FUNC_SRC_RGB_ONE                                               0x00000001
-#define   NV50TCL_BLEND_FUNC_SRC_RGB_SRC_COLOR                                         0x00000300
-#define   NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR                               0x00000301
-#define   NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA                                         0x00000302
-#define   NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA                               0x00000303
-#define   NV50TCL_BLEND_FUNC_SRC_RGB_DST_ALPHA                                         0x00000304
-#define   NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA                               0x00000305
-#define   NV50TCL_BLEND_FUNC_SRC_RGB_DST_COLOR                                         0x00000306
-#define   NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR                               0x00000307
-#define   NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE                                        0x00000308
-#define   NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR                                    0x00008001
-#define   NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR                          0x00008002
-#define   NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA                                    0x00008003
-#define   NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA                          0x00008004
+#define   NV50TCL_BLEND_FUNC_SRC_RGB_ZERO                                              0x00004000
+#define   NV50TCL_BLEND_FUNC_SRC_RGB_ONE                                               0x00004001
+#define   NV50TCL_BLEND_FUNC_SRC_RGB_SRC_COLOR                                         0x00004300
+#define   NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR                               0x00004301
+#define   NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA                                         0x00004302
+#define   NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA                               0x00004303
+#define   NV50TCL_BLEND_FUNC_SRC_RGB_DST_ALPHA                                         0x00004304
+#define   NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA                               0x00004305
+#define   NV50TCL_BLEND_FUNC_SRC_RGB_DST_COLOR                                         0x00004306
+#define   NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR                               0x00004307
+#define   NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE                                        0x00004308
+#define   NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR                                    0x0000c001
+#define   NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR                          0x0000c002
+#define   NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA                                    0x0000c003
+#define   NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA                          0x0000c004
+#define   NV50TCL_BLEND_FUNC_SRC_RGB_SRC1_COLOR                                                0x0000c900
+#define   NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC1_COLOR                              0x0000c901
+#define   NV50TCL_BLEND_FUNC_SRC_RGB_SRC1_ALPHA                                                0x0000c902
+#define   NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC1_ALPHA                              0x0000c903
 #define  NV50TCL_BLEND_FUNC_DST_RGB                                                    0x00001348
-#define   NV50TCL_BLEND_FUNC_DST_RGB_ZERO                                              0x00000000
-#define   NV50TCL_BLEND_FUNC_DST_RGB_ONE                                               0x00000001
-#define   NV50TCL_BLEND_FUNC_DST_RGB_SRC_COLOR                                         0x00000300
-#define   NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_COLOR                               0x00000301
-#define   NV50TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA                                         0x00000302
-#define   NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_ALPHA                               0x00000303
-#define   NV50TCL_BLEND_FUNC_DST_RGB_DST_ALPHA                                         0x00000304
-#define   NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_ALPHA                               0x00000305
-#define   NV50TCL_BLEND_FUNC_DST_RGB_DST_COLOR                                         0x00000306
-#define   NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_COLOR                               0x00000307
-#define   NV50TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA_SATURATE                                        0x00000308
-#define   NV50TCL_BLEND_FUNC_DST_RGB_CONSTANT_COLOR                                    0x00008001
-#define   NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_COLOR                          0x00008002
-#define   NV50TCL_BLEND_FUNC_DST_RGB_CONSTANT_ALPHA                                    0x00008003
-#define   NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_ALPHA                          0x00008004
+#define   NV50TCL_BLEND_FUNC_DST_RGB_ZERO                                              0x00004000
+#define   NV50TCL_BLEND_FUNC_DST_RGB_ONE                                               0x00004001
+#define   NV50TCL_BLEND_FUNC_DST_RGB_SRC_COLOR                                         0x00004300
+#define   NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_COLOR                               0x00004301
+#define   NV50TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA                                         0x00004302
+#define   NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_ALPHA                               0x00004303
+#define   NV50TCL_BLEND_FUNC_DST_RGB_DST_ALPHA                                         0x00004304
+#define   NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_ALPHA                               0x00004305
+#define   NV50TCL_BLEND_FUNC_DST_RGB_DST_COLOR                                         0x00004306
+#define   NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_COLOR                               0x00004307
+#define   NV50TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA_SATURATE                                        0x00004308
+#define   NV50TCL_BLEND_FUNC_DST_RGB_CONSTANT_COLOR                                    0x0000c001
+#define   NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_COLOR                          0x0000c002
+#define   NV50TCL_BLEND_FUNC_DST_RGB_CONSTANT_ALPHA                                    0x0000c003
+#define   NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_ALPHA                          0x0000c004
+#define   NV50TCL_BLEND_FUNC_DST_RGB_SRC1_COLOR                                                0x0000c900
+#define   NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC1_COLOR                              0x0000c901
+#define   NV50TCL_BLEND_FUNC_DST_RGB_SRC1_ALPHA                                                0x0000c902
+#define   NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC1_ALPHA                              0x0000c903
 #define  NV50TCL_BLEND_EQUATION_ALPHA                                                  0x0000134c
 #define   NV50TCL_BLEND_EQUATION_ALPHA_FUNC_ADD                                                0x00008006
 #define   NV50TCL_BLEND_EQUATION_ALPHA_MIN                                             0x00008007
@@ -8159,37 +8222,45 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define   NV50TCL_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT                                   0x0000800a
 #define   NV50TCL_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT                           0x0000800b
 #define  NV50TCL_BLEND_FUNC_SRC_ALPHA                                                  0x00001350
-#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_ZERO                                            0x00000000
-#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE                                             0x00000001
-#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_COLOR                                       0x00000300
-#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_COLOR                             0x00000301
-#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA                                       0x00000302
-#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_ALPHA                             0x00000303
-#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_DST_ALPHA                                       0x00000304
-#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_ALPHA                             0x00000305
-#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_DST_COLOR                                       0x00000306
-#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_COLOR                             0x00000307
-#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA_SATURATE                              0x00000308
-#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_COLOR                                  0x00008001
-#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_COLOR                                0x00008002
-#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_ALPHA                                  0x00008003
-#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_ALPHA                                0x00008004
+#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_ZERO                                            0x00004000
+#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE                                             0x00004001
+#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_COLOR                                       0x00004300
+#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_COLOR                             0x00004301
+#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA                                       0x00004302
+#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_ALPHA                             0x00004303
+#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_DST_ALPHA                                       0x00004304
+#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_ALPHA                             0x00004305
+#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_DST_COLOR                                       0x00004306
+#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_COLOR                             0x00004307
+#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA_SATURATE                              0x00004308
+#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_COLOR                                  0x0000c001
+#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_COLOR                                0x0000c002
+#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_ALPHA                                  0x0000c003
+#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_ALPHA                                0x0000c004
+#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC1_COLOR                                      0x0000c900
+#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC1_COLOR                            0x0000c901
+#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC1_ALPHA                                      0x0000c902
+#define   NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC1_ALPHA                            0x0000c903
 #define  NV50TCL_BLEND_FUNC_DST_ALPHA                                                  0x00001358
-#define   NV50TCL_BLEND_FUNC_DST_ALPHA_ZERO                                            0x00000000
-#define   NV50TCL_BLEND_FUNC_DST_ALPHA_ONE                                             0x00000001
-#define   NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_COLOR                                       0x00000300
-#define   NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_COLOR                             0x00000301
-#define   NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA                                       0x00000302
-#define   NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_ALPHA                             0x00000303
-#define   NV50TCL_BLEND_FUNC_DST_ALPHA_DST_ALPHA                                       0x00000304
-#define   NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_ALPHA                             0x00000305
-#define   NV50TCL_BLEND_FUNC_DST_ALPHA_DST_COLOR                                       0x00000306
-#define   NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_COLOR                             0x00000307
-#define   NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA_SATURATE                              0x00000308
-#define   NV50TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_COLOR                                  0x00008001
-#define   NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_COLOR                                0x00008002
-#define   NV50TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_ALPHA                                  0x00008003
-#define   NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_ALPHA                                0x00008004
+#define   NV50TCL_BLEND_FUNC_DST_ALPHA_ZERO                                            0x00004000
+#define   NV50TCL_BLEND_FUNC_DST_ALPHA_ONE                                             0x00004001
+#define   NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_COLOR                                       0x00004300
+#define   NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_COLOR                             0x00004301
+#define   NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA                                       0x00004302
+#define   NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_ALPHA                             0x00004303
+#define   NV50TCL_BLEND_FUNC_DST_ALPHA_DST_ALPHA                                       0x00004304
+#define   NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_ALPHA                             0x00004305
+#define   NV50TCL_BLEND_FUNC_DST_ALPHA_DST_COLOR                                       0x00004306
+#define   NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_COLOR                             0x00004307
+#define   NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA_SATURATE                              0x00004308
+#define   NV50TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_COLOR                                  0x0000c001
+#define   NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_COLOR                                0x0000c002
+#define   NV50TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_ALPHA                                  0x0000c003
+#define   NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_ALPHA                                0x0000c004
+#define   NV50TCL_BLEND_FUNC_DST_ALPHA_SRC1_COLOR                                      0x0000c900
+#define   NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC1_COLOR                            0x0000c901
+#define   NV50TCL_BLEND_FUNC_DST_ALPHA_SRC1_ALPHA                                      0x0000c902
+#define   NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC1_ALPHA                            0x0000c903
 #define  NV50TCL_BLEND_ENABLE(x)                                                       (0x00001360+((x)*4))
 #define  NV50TCL_BLEND_ENABLE__SIZE                                                    0x00000008
 #define  NV50TCL_STENCIL_FRONT_ENABLE                                                  0x00001380
@@ -8248,6 +8319,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define  NV50TCL_FP_START_ID                                                           0x00001414
 #define  NV50TCL_GP_VERTEX_OUTPUT_COUNT                                                        0x00001420
 #define  NV50TCL_VB_ELEMENT_BASE                                                       0x00001434
+#define  NV50TCL_INSTANCE_BASE                                                         0x00001438
 #define  NV50TCL_CODE_CB_FLUSH                                                         0x00001440
 #define  NV50TCL_BIND_TSC(x)                                                           (0x00001444+((x)*8))
 #define  NV50TCL_BIND_TSC__SIZE                                                                0x00000003
@@ -8265,6 +8337,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define   NV50TCL_BIND_TIC_TIC_MASK                                                    0x7ffffe00
 #define  NV50TCL_STRMOUT_MAP(x)                                                                (0x00001480+((x)*4))
 #define  NV50TCL_STRMOUT_MAP__SIZE                                                     0x00000020
+#define  NV50TCL_CLIPID_HEIGHT                                                         0x00001504
 #define  NV50TCL_VP_CLIP_DISTANCE_ENABLE                                               0x00001510
 #define   NV50TCL_VP_CLIP_DISTANCE_ENABLE_0                                            (1 <<  0)
 #define   NV50TCL_VP_CLIP_DISTANCE_ENABLE_1                                            (1 <<  1)
@@ -8349,7 +8422,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define  NV50TCL_GP_BUILTIN_RESULT_EN                                                  0x000015cc
 #define   NV50TCL_GP_BUILTIN_RESULT_EN_VPORT_IDX                                       (1 <<  0)
 #define   NV50TCL_GP_BUILTIN_RESULT_EN_LAYER_IDX                                       (1 << 16)
-#define  NV50TCL_MULTISAMPLE_SAMPLES_LOG2                                              0x000015d0
+#define  NV50TCL_MULTISAMPLE_MODE                                                      0x000015d0
+#define   NV50TCL_MULTISAMPLE_MODE_1X                                                  0x00000000
+#define   NV50TCL_MULTISAMPLE_MODE_2XMS                                                        0x00000001
+#define   NV50TCL_MULTISAMPLE_MODE_4XMS                                                        0x00000002
+#define   NV50TCL_MULTISAMPLE_MODE_8XMS                                                        0x00000004
+#define   NV50TCL_MULTISAMPLE_MODE_4XMS_4XCS                                           0x00000008
+#define   NV50TCL_MULTISAMPLE_MODE_4XMS_12XCS                                          0x00000009
+#define   NV50TCL_MULTISAMPLE_MODE_8XMS_8XCS                                           0x0000000a
 #define  NV50TCL_VERTEX_BEGIN                                                          0x000015dc
 #define   NV50TCL_VERTEX_BEGIN_POINTS                                                  0x00000000
 #define   NV50TCL_VERTEX_BEGIN_LINES                                                   0x00000001
@@ -8365,6 +8445,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define   NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY                                    0x0000000b
 #define   NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY                                     0x0000000c
 #define   NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY                                        0x0000000d
+#define   NV50TCL_VERTEX_BEGIN_PATCHES                                                 0x0000000e
 #define  NV50TCL_VERTEX_END                                                            0x000015e0
 #define  NV50TCL_EDGEFLAG_ENABLE                                                       0x000015e4
 #define  NV50TCL_VB_ELEMENT_U32                                                                0x000015e8
@@ -8378,6 +8459,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define   NV50TCL_VB_ELEMENT_U16_I0_MASK                                               0x0000ffff
 #define   NV50TCL_VB_ELEMENT_U16_I1_SHIFT                                              16
 #define   NV50TCL_VB_ELEMENT_U16_I1_MASK                                               0xffff0000
+#define  NV50TCL_VERTEX_BASE_HIGH                                                      0x000015f4
+#define  NV50TCL_VERTEX_BASE_LOW                                                       0x000015f8
 #define  NV50TCL_VERTEX_DATA                                                           0x00001640
 #define  NV50TCL_PRIM_RESTART_ENABLE                                                   0x00001644
 #define  NV50TCL_PRIM_RESTART_INDEX                                                    0x00001648
@@ -8763,7 +8846,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define  NV50TCL_VIEWPORT_TRANSFORM_EN                                                 0x0000192c
 #define  NV50TCL_VIEW_VOLUME_CLIP_CTRL                                                 0x0000193c
 #define  NV50TCL_VIEWPORT_CLIP_RECTS_EN                                                        0x0000194c
+#define  NV50TCL_VIEWPORT_CLIP_MODE                                                    0x00001950
+#define   NV50TCL_VIEWPORT_CLIP_MODE_INCLUDE                                           0x00000000
+#define   NV50TCL_VIEWPORT_CLIP_MODE_EXCLUDE                                           0x00000001
+#define   NV50TCL_VIEWPORT_CLIP_MODE_UNKNOWN                                           0x00000002
 #define  NV50TCL_FP_CTRL_UNK196C                                                       0x0000196c
+#define  NV50TCL_CLIPID_ENABLE                                                         0x0000197c
+#define  NV50TCL_CLIPID_WIDTH                                                          0x00001980
+#define  NV50TCL_CLIPID_ID                                                             0x00001984
 #define  NV50TCL_FP_INTERPOLANT_CTRL                                                   0x00001988
 #define   NV50TCL_FP_INTERPOLANT_CTRL_UMASK_SHIFT                                      24
 #define   NV50TCL_FP_INTERPOLANT_CTRL_UMASK_MASK                                       0xff000000
@@ -8864,19 +8954,20 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define    NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8                                      0x00c00000
 #define    NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16                                       0x00d80000
 #define    NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8                                                0x00e80000
+#define    NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_2_10_10_10                               0x01800000
 #define   NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SHIFT                                       25
-#define   NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_MASK                                                0x7e000000
-#define    NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT                                      0x7e000000
-#define    NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UNORM                                      0x24000000
-#define    NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SNORM                                      0x12000000
-#define    NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_USCALED                                    0x5a000000
-#define    NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED                                    0x6c000000
-#define    NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UINT                                       0x48000000
-#define    NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SINT                                       0x36000000
+#define   NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_MASK                                                0x0e000000
+#define    NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT                                      0x0e000000
+#define    NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SNORM                                      0x02000000
+#define    NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UNORM                                      0x04000000
+#define    NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_USCALED                                    0x0a000000
+#define    NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED                                    0x0c000000
+#define    NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UINT                                       0x08000000
+#define    NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SINT                                       0x06000000
 #define   NV50TCL_VERTEX_ARRAY_ATTRIB_BGRA                                             (1 << 31)
 #define  NV50TCL_QUERY_ADDRESS_HIGH                                                    0x00001b00
 #define  NV50TCL_QUERY_ADDRESS_LOW                                                     0x00001b04
-#define  NV50TCL_QUERY_COUNTER                                                         0x00001b08
+#define  NV50TCL_QUERY_SEQUENCE                                                                0x00001b08
 #define  NV50TCL_QUERY_GET                                                             0x00001b0c
 
 
@@ -9031,4 +9122,938 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define  NV50_COMPUTE_USER_PARAM__SIZE                                                 0x00000040
 
 
+#define NVC0TCL                                                                                0x00009097
+
+#define  NVC0TCL_SEMAPHORE_ADDRESS_HIGH                                                        0x00000010
+#define  NVC0TCL_SEMAPHORE_ADDRESS_LOW                                                 0x00000014
+#define  NVC0TCL_NOP                                                                   0x00000100
+#define  NVC0TCL_NOTIFY_ADDRESS_HIGH                                                   0x00000104
+#define  NVC0TCL_NOTIFY_ADDRESS_LOW                                                    0x00000108
+#define  NVC0TCL_NOTIFY                                                                        0x0000010c
+#define  NVC0TCL_SERIALIZE                                                             0x00000110
+#define  NVC0TCL_EARLY_FRAGMENT_TESTS                                                  0x00000210
+#define  NVC0TCL_TESS_MODE                                                             0x00000320
+#define   NVC0TCL_TESS_MODE_PRIM_SHIFT                                                 0
+#define   NVC0TCL_TESS_MODE_PRIM_MASK                                                  0x0000000f
+#define    NVC0TCL_TESS_MODE_PRIM_ISOLINES                                             0x00000000
+#define    NVC0TCL_TESS_MODE_PRIM_TRIANGLES                                            0x00000001
+#define    NVC0TCL_TESS_MODE_PRIM_QUADS                                                        0x00000002
+#define   NVC0TCL_TESS_MODE_SPACING_SHIFT                                              4
+#define   NVC0TCL_TESS_MODE_SPACING_MASK                                               0x000000f0
+#define    NVC0TCL_TESS_MODE_SPACING_EQUAL                                             0x00000000
+#define    NVC0TCL_TESS_MODE_SPACING_FRACTIONAL_ODD                                    0x00000010
+#define    NVC0TCL_TESS_MODE_SPACING_FRACTIONAL_EVEN                                   0x00000020
+#define   NVC0TCL_TESS_MODE_CW                                                         (1 <<  8)
+#define   NVC0TCL_TESS_MODE_CONNECTED                                                  (1 <<  9)
+#define  NVC0TCL_TESS_LEVEL_OUTER(x)                                                   (0x00000324+((x)*4))
+#define  NVC0TCL_TESS_LEVEL_OUTER__SIZE                                                        0x00000004
+#define  NVC0TCL_TESS_LEVEL_INNER(x)                                                   (0x00000334+((x)*4))
+#define  NVC0TCL_TESS_LEVEL_INNER__SIZE                                                        0x00000002
+#define  NVC0TCL_RASTERIZE_ENABLE                                                      0x0000037c
+#define  NVC0TCL_TFB_BUFFER_ENABLE(x)                                                  (0x00000380+((x)*32))
+#define  NVC0TCL_TFB_BUFFER_ENABLE__SIZE                                               0x00000004
+#define  NVC0TCL_TFB_ADDRESS_HIGH(x)                                                   (0x00000384+((x)*32))
+#define  NVC0TCL_TFB_ADDRESS_HIGH__SIZE                                                        0x00000004
+#define  NVC0TCL_TFB_ADDRESS_LOW(x)                                                    (0x00000388+((x)*32))
+#define  NVC0TCL_TFB_ADDRESS_LOW__SIZE                                                 0x00000004
+#define  NVC0TCL_TFB_BUFFER_SIZE(x)                                                    (0x0000038c+((x)*32))
+#define  NVC0TCL_TFB_BUFFER_SIZE__SIZE                                                 0x00000004
+#define  NVC0TCL_TFB_PRIMITIVE_ID(x)                                                   (0x00000390+((x)*32))
+#define  NVC0TCL_TFB_PRIMITIVE_ID__SIZE                                                        0x00000004
+#define  NVC0TCL_TFB_UNK0700(x)                                                                (0x00000700+((x)*16))
+#define  NVC0TCL_TFB_UNK0700__SIZE                                                     0x00000004
+#define  NVC0TCL_TFB_VARYING_COUNT(x)                                                  (0x00000704+((x)*16))
+#define  NVC0TCL_TFB_VARYING_COUNT__SIZE                                               0x00000004
+#define  NVC0TCL_TFB_BUFFER_STRIDE(x)                                                  (0x00000708+((x)*16))
+#define  NVC0TCL_TFB_BUFFER_STRIDE__SIZE                                               0x00000004
+#define  NVC0TCL_TFB_ENABLE                                                            0x00000744
+#define  NVC0TCL_LOCAL_BASE                                                            0x0000077c
+#define  NVC0TCL_UNK0790_ADDRESS_HIGH                                                  0x00000790
+#define  NVC0TCL_UNK0790_ADDRESS_LOW                                                   0x00000794
+#define  NVC0TCL_RT_ADDRESS_HIGH(x)                                                    (0x00000800+((x)*32))
+#define  NVC0TCL_RT_ADDRESS_HIGH__SIZE                                                 0x00000008
+#define  NVC0TCL_RT_ADDRESS_LOW(x)                                                     (0x00000804+((x)*32))
+#define  NVC0TCL_RT_ADDRESS_LOW__SIZE                                                  0x00000008
+#define  NVC0TCL_RT_HORIZ(x)                                                           (0x00000808+((x)*32))
+#define  NVC0TCL_RT_HORIZ__SIZE                                                                0x00000008
+#define  NVC0TCL_RT_VERT(x)                                                            (0x0000080c+((x)*32))
+#define  NVC0TCL_RT_VERT__SIZE                                                         0x00000008
+#define  NVC0TCL_RT_FORMAT(x)                                                          (0x00000810+((x)*32))
+#define  NVC0TCL_RT_FORMAT__SIZE                                                       0x00000008
+#define   NVC0TCL_RT_FORMAT_R32G32B32A32_FLOAT                                         0x000000c0
+#define   NVC0TCL_RT_FORMAT_R32G32B32A32_SINT                                          0x000000c1
+#define   NVC0TCL_RT_FORMAT_R32G32B32A32_UINT                                          0x000000c2
+#define   NVC0TCL_RT_FORMAT_R32G32B32X32_FLOAT                                         0x000000c3
+#define   NVC0TCL_RT_FORMAT_R16G16B16A16_UNORM                                         0x000000c6
+#define   NVC0TCL_RT_FORMAT_R16G16B16A16_SNORM                                         0x000000c7
+#define   NVC0TCL_RT_FORMAT_R16G16B16A16_SINT                                          0x000000c8
+#define   NVC0TCL_RT_FORMAT_R16G16B16A16_UINT                                          0x000000c9
+#define   NVC0TCL_RT_FORMAT_R16G16B16A16_FLOAT                                         0x000000ca
+#define   NVC0TCL_RT_FORMAT_R32G32_FLOAT                                               0x000000cb
+#define   NVC0TCL_RT_FORMAT_R32G32_SINT                                                        0x000000cc
+#define   NVC0TCL_RT_FORMAT_R32G32_UINT                                                        0x000000cd
+#define   NVC0TCL_RT_FORMAT_R16G16B16X16_FLOAT                                         0x000000ce
+#define   NVC0TCL_RT_FORMAT_A8R8G8B8_UNORM                                             0x000000cf
+#define   NVC0TCL_RT_FORMAT_A8R8G8B8_SRGB                                              0x000000d0
+#define   NVC0TCL_RT_FORMAT_A2B10G10R10_UNORM                                          0x000000d1
+#define   NVC0TCL_RT_FORMAT_A2B10G10R10_UINT                                           0x000000d2
+#define   NVC0TCL_RT_FORMAT_A8B8G8R8_UNORM                                             0x000000d5
+#define   NVC0TCL_RT_FORMAT_A8B8G8R8_SRGB                                              0x000000d6
+#define   NVC0TCL_RT_FORMAT_A8B8G8R8_SNORM                                             0x000000d7
+#define   NVC0TCL_RT_FORMAT_A8B8G8R8_SINT                                              0x000000d8
+#define   NVC0TCL_RT_FORMAT_A8B8G8R8_UINT                                              0x000000d9
+#define   NVC0TCL_RT_FORMAT_R16G16_UNORM                                               0x000000da
+#define   NVC0TCL_RT_FORMAT_R16G16_SNORM                                               0x000000db
+#define   NVC0TCL_RT_FORMAT_R16G16_SINT                                                        0x000000dc
+#define   NVC0TCL_RT_FORMAT_R16G16_UINT                                                        0x000000dd
+#define   NVC0TCL_RT_FORMAT_R16G16_FLOAT                                               0x000000de
+#define   NVC0TCL_RT_FORMAT_A2R10G10B10_UNORM                                          0x000000df
+#define   NVC0TCL_RT_FORMAT_B10G11R11_FLOAT                                            0x000000e0
+#define   NVC0TCL_RT_FORMAT_R32_FLOAT                                                  0x000000e5
+#define   NVC0TCL_RT_FORMAT_X8R8G8B8_UNORM                                             0x000000e6
+#define   NVC0TCL_RT_FORMAT_X8R8G8B8_SRGB                                              0x000000e7
+#define   NVC0TCL_RT_FORMAT_R5G6B5_UNORM                                               0x000000e8
+#define   NVC0TCL_RT_FORMAT_A1R5G5B5_UNORM                                             0x000000e9
+#define   NVC0TCL_RT_FORMAT_R8G8_UNORM                                                 0x000000ea
+#define   NVC0TCL_RT_FORMAT_R8G8_SNORM                                                 0x000000eb
+#define   NVC0TCL_RT_FORMAT_R8G8_SINT                                                  0x000000ec
+#define   NVC0TCL_RT_FORMAT_R8G8_UINT                                                  0x000000ed
+#define   NVC0TCL_RT_FORMAT_R16_UNORM                                                  0x000000ee
+#define   NVC0TCL_RT_FORMAT_R16_SNORM                                                  0x000000ef
+#define   NVC0TCL_RT_FORMAT_R16_SINT                                                   0x000000f0
+#define   NVC0TCL_RT_FORMAT_R16_UINT                                                   0x000000f1
+#define   NVC0TCL_RT_FORMAT_R16_FLOAT                                                  0x000000f2
+#define   NVC0TCL_RT_FORMAT_R8_UNORM                                                   0x000000f3
+#define   NVC0TCL_RT_FORMAT_R8_SNORM                                                   0x000000f4
+#define   NVC0TCL_RT_FORMAT_R8_SINT                                                    0x000000f5
+#define   NVC0TCL_RT_FORMAT_R8_UINT                                                    0x000000f6
+#define   NVC0TCL_RT_FORMAT_A8_UNORM                                                   0x000000f7
+#define   NVC0TCL_RT_FORMAT_X1R5G5B5_UNORM                                             0x000000f8
+#define   NVC0TCL_RT_FORMAT_X8B8G8R8_UNORM                                             0x000000f9
+#define   NVC0TCL_RT_FORMAT_X8B8G8R8_SRGB                                              0x000000fa
+#define  NVC0TCL_RT_TILE_MODE(x)                                                       (0x00000814+((x)*32))
+#define  NVC0TCL_RT_TILE_MODE__SIZE                                                    0x00000008
+#define  NVC0TCL_RT_ARRAY_MODE(x)                                                      (0x00000818+((x)*32))
+#define  NVC0TCL_RT_ARRAY_MODE__SIZE                                                   0x00000008
+#define   NVC0TCL_RT_ARRAY_MODE_LAYERS_SHIFT                                           0
+#define   NVC0TCL_RT_ARRAY_MODE_LAYERS_MASK                                            0x0000ffff
+#define   NVC0TCL_RT_ARRAY_MODE_VOLUME                                                 (1 << 16)
+#define  NVC0TCL_RT_LAYER_STRIDE(x)                                                    (0x0000081c+((x)*32))
+#define  NVC0TCL_RT_LAYER_STRIDE__SIZE                                                 0x00000008
+#define  NVC0TCL_VIEWPORT_SCALE_X(x)                                                   (0x00000a00+((x)*32))
+#define  NVC0TCL_VIEWPORT_SCALE_X__SIZE                                                        0x00000010
+#define  NVC0TCL_VIEWPORT_SCALE_Y(x)                                                   (0x00000a04+((x)*32))
+#define  NVC0TCL_VIEWPORT_SCALE_Y__SIZE                                                        0x00000010
+#define  NVC0TCL_VIEWPORT_SCALE_Z(x)                                                   (0x00000a08+((x)*32))
+#define  NVC0TCL_VIEWPORT_SCALE_Z__SIZE                                                        0x00000010
+#define  NVC0TCL_VIEWPORT_TRANSLATE_X(x)                                               (0x00000a0c+((x)*32))
+#define  NVC0TCL_VIEWPORT_TRANSLATE_X__SIZE                                            0x00000010
+#define  NVC0TCL_VIEWPORT_TRANSLATE_Y(x)                                               (0x00000a10+((x)*32))
+#define  NVC0TCL_VIEWPORT_TRANSLATE_Y__SIZE                                            0x00000010
+#define  NVC0TCL_VIEWPORT_TRANSLATE_Z(x)                                               (0x00000a14+((x)*32))
+#define  NVC0TCL_VIEWPORT_TRANSLATE_Z__SIZE                                            0x00000010
+#define  NVC0TCL_VIEWPORT_HORIZ(x)                                                     (0x00000c00+((x)*16))
+#define  NVC0TCL_VIEWPORT_HORIZ__SIZE                                                  0x00000010
+#define   NVC0TCL_VIEWPORT_HORIZ_X_SHIFT                                               0
+#define   NVC0TCL_VIEWPORT_HORIZ_X_MASK                                                        0x0000ffff
+#define   NVC0TCL_VIEWPORT_HORIZ_W_SHIFT                                               16
+#define   NVC0TCL_VIEWPORT_HORIZ_W_MASK                                                        0xffff0000
+#define  NVC0TCL_VIEWPORT_VERT(x)                                                      (0x00000c04+((x)*16))
+#define  NVC0TCL_VIEWPORT_VERT__SIZE                                                   0x00000010
+#define   NVC0TCL_VIEWPORT_VERT_Y_SHIFT                                                        0
+#define   NVC0TCL_VIEWPORT_VERT_Y_MASK                                                 0x0000ffff
+#define   NVC0TCL_VIEWPORT_VERT_H_SHIFT                                                        16
+#define   NVC0TCL_VIEWPORT_VERT_H_MASK                                                 0xffff0000
+#define  NVC0TCL_DEPTH_RANGE_NEAR(x)                                                   (0x00000c08+((x)*16))
+#define  NVC0TCL_DEPTH_RANGE_NEAR__SIZE                                                        0x00000010
+#define  NVC0TCL_DEPTH_RANGE_FAR(x)                                                    (0x00000c0c+((x)*16))
+#define  NVC0TCL_DEPTH_RANGE_FAR__SIZE                                                 0x00000010
+#define  NVC0TCL_VIEWPORT_CLIP_HORIZ(x)                                                        (0x00000d00+((x)*8))
+#define  NVC0TCL_VIEWPORT_CLIP_HORIZ__SIZE                                             0x00000008
+#define   NVC0TCL_VIEWPORT_CLIP_HORIZ_MIN_SHIFT                                                0
+#define   NVC0TCL_VIEWPORT_CLIP_HORIZ_MIN_MASK                                         0x0000ffff
+#define   NVC0TCL_VIEWPORT_CLIP_HORIZ_MAX_SHIFT                                                16
+#define   NVC0TCL_VIEWPORT_CLIP_HORIZ_MAX_MASK                                         0xffff0000
+#define  NVC0TCL_VIEWPORT_CLIP_VERT(x)                                                 (0x00000d04+((x)*8))
+#define  NVC0TCL_VIEWPORT_CLIP_VERT__SIZE                                              0x00000008
+#define   NVC0TCL_VIEWPORT_CLIP_VERT_MIN_SHIFT                                         0
+#define   NVC0TCL_VIEWPORT_CLIP_VERT_MIN_MASK                                          0x0000ffff
+#define   NVC0TCL_VIEWPORT_CLIP_VERT_MAX_SHIFT                                         16
+#define   NVC0TCL_VIEWPORT_CLIP_VERT_MAX_MASK                                          0xffff0000
+#define  NVC0TCL_CLIPID_REGION_HORIZ(x)                                                        (0x00000d40+((x)*8))
+#define  NVC0TCL_CLIPID_REGION_HORIZ__SIZE                                             0x00000004
+#define  NVC0TCL_CLIPID_REGION_VERT(x)                                                 (0x00000d44+((x)*8))
+#define  NVC0TCL_CLIPID_REGION_VERT__SIZE                                              0x00000004
+#define  NVC0TCL_VERTEX_BUFFER_FIRST                                                   0x00000d74
+#define  NVC0TCL_VERTEX_BUFFER_COUNT                                                   0x00000d78
+#define  NVC0TCL_CLEAR_COLOR(x)                                                                (0x00000d80+((x)*4))
+#define  NVC0TCL_CLEAR_COLOR__SIZE                                                     0x00000004
+#define  NVC0TCL_CLEAR_DEPTH                                                           0x00000d90
+#define  NVC0TCL_STACK_ADDRESS_HIGH                                                    0x00000d94
+#define  NVC0TCL_STACK_ADDRESS_LOW                                                     0x00000d98
+#define  NVC0TCL_STACK_SIZE_LOG                                                                0x00000d9c
+#define  NVC0TCL_CLEAR_STENCIL                                                         0x00000da0
+#define  NVC0TCL_POLYGON_SMOOTH_ENABLE                                                 0x00000db4
+#define  NVC0TCL_POLYGON_OFFSET_POINT_ENABLE                                           0x00000dc0
+#define  NVC0TCL_POLYGON_OFFSET_LINE_ENABLE                                            0x00000dc4
+#define  NVC0TCL_POLYGON_OFFSET_FILL_ENABLE                                            0x00000dc8
+#define  NVC0TCL_PATCH_VERTICES                                                                0x00000dcc
+#define  NVC0TCL_WATCHDOG_TIMER                                                                0x00000de4
+#define  NVC0TCL_WINDOW_OFFSET_X                                                       0x00000df8
+#define  NVC0TCL_WINDOW_OFFSET_Y                                                       0x00000dfc
+#define  NVC0TCL_SCISSOR_ENABLE(x)                                                     (0x00000e00+((x)*16))
+#define  NVC0TCL_SCISSOR_ENABLE__SIZE                                                  0x00000010
+#define  NVC0TCL_SCISSOR_HORIZ(x)                                                      (0x00000e04+((x)*16))
+#define  NVC0TCL_SCISSOR_HORIZ__SIZE                                                   0x00000010
+#define   NVC0TCL_SCISSOR_HORIZ_MIN_SHIFT                                              0
+#define   NVC0TCL_SCISSOR_HORIZ_MIN_MASK                                               0x0000ffff
+#define   NVC0TCL_SCISSOR_HORIZ_MAX_SHIFT                                              16
+#define   NVC0TCL_SCISSOR_HORIZ_MAX_MASK                                               0xffff0000
+#define  NVC0TCL_SCISSOR_VERT(x)                                                       (0x00000e08+((x)*16))
+#define  NVC0TCL_SCISSOR_VERT__SIZE                                                    0x00000010
+#define   NVC0TCL_SCISSOR_VERT_MIN_SHIFT                                               0
+#define   NVC0TCL_SCISSOR_VERT_MIN_MASK                                                        0x0000ffff
+#define   NVC0TCL_SCISSOR_VERT_MAX_SHIFT                                               16
+#define   NVC0TCL_SCISSOR_VERT_MAX_MASK                                                        0xffff0000
+#define  NVC0TCL_LOCAL_WARPS_LOG_ALLOC                                                 0x00000f44
+#define  NVC0TCL_LOCAL_WARPS_NO_CLAMP                                                  0x00000f48
+#define  NVC0TCL_STACK_WARPS_LOG_ALLOC                                                 0x00000f4c
+#define  NVC0TCL_STACK_WARPS_NO_CLAMP                                                  0x00000f50
+#define  NVC0TCL_STENCIL_BACK_FUNC_REF                                                 0x00000f54
+#define  NVC0TCL_STENCIL_BACK_MASK                                                     0x00000f58
+#define  NVC0TCL_STENCIL_BACK_FUNC_MASK                                                        0x00000f5c
+#define  NVC0TCL_VERTEX_RUNOUT_HIGH                                                    0x00000f84
+#define  NVC0TCL_VERTEX_RUNOUT_LOW                                                     0x00000f88
+#define  NVC0TCL_DEPTH_BOUNDS(x)                                                       (0x00000f9c+((x)*4))
+#define  NVC0TCL_DEPTH_BOUNDS__SIZE                                                    0x00000002
+#define  NVC0TCL_MSAA_MASK(x)                                                          (0x00000fbc+((x)*4))
+#define  NVC0TCL_MSAA_MASK__SIZE                                                       0x00000004
+#define  NVC0TCL_CLIPID_ADDRESS_HIGH                                                   0x00000fcc
+#define  NVC0TCL_CLIPID_ADDRESS_LOW                                                    0x00000fd0
+#define  NVC0TCL_ZETA_ADDRESS_HIGH                                                     0x00000fe0
+#define  NVC0TCL_ZETA_ADDRESS_LOW                                                      0x00000fe4
+#define  NVC0TCL_ZETA_FORMAT                                                           0x00000fe8
+#define   NVC0TCL_ZETA_FORMAT_Z32_FLOAT                                                        0x0000000a
+#define   NVC0TCL_ZETA_FORMAT_Z16_UNORM                                                        0x00000013
+#define   NVC0TCL_ZETA_FORMAT_Z24S8_UNORM                                              0x00000014
+#define   NVC0TCL_ZETA_FORMAT_X8Z24_UNORM                                              0x00000015
+#define   NVC0TCL_ZETA_FORMAT_S8Z24_UNORM                                              0x00000016
+#define   NVC0TCL_ZETA_FORMAT_Z32_FLOAT_X24S8_UNORM                                    0x00000019
+#define  NVC0TCL_ZETA_TILE_MODE                                                                0x00000fec
+#define  NVC0TCL_ZETA_LAYER_STRIDE                                                     0x00000ff0
+#define  NVC0TCL_SCREEN_SCISSOR_HORIZ                                                  0x00000ff4
+#define   NVC0TCL_SCREEN_SCISSOR_HORIZ_W_SHIFT                                         16
+#define   NVC0TCL_SCREEN_SCISSOR_HORIZ_W_MASK                                          0xffff0000
+#define   NVC0TCL_SCREEN_SCISSOR_HORIZ_X_SHIFT                                         0
+#define   NVC0TCL_SCREEN_SCISSOR_HORIZ_X_MASK                                          0x0000ffff
+#define  NVC0TCL_SCREEN_SCISSOR_VERT                                                   0x00000ff8
+#define   NVC0TCL_SCREEN_SCISSOR_VERT_H_SHIFT                                          16
+#define   NVC0TCL_SCREEN_SCISSOR_VERT_H_MASK                                           0xffff0000
+#define   NVC0TCL_SCREEN_SCISSOR_VERT_Y_SHIFT                                          0
+#define   NVC0TCL_SCREEN_SCISSOR_VERT_Y_MASK                                           0x0000ffff
+#define  NVC0TCL_VTX_ATTR_DEFINE                                                       0x0000114c
+#define   NVC0TCL_VTX_ATTR_DEFINE_ATTR_SHIFT                                           0
+#define   NVC0TCL_VTX_ATTR_DEFINE_ATTR_MASK                                            0x0000003f
+#define   NVC0TCL_VTX_ATTR_DEFINE_COMP_SHIFT                                           8
+#define   NVC0TCL_VTX_ATTR_DEFINE_COMP_MASK                                            0x00000f00
+#define   NVC0TCL_VTX_ATTR_DEFINE_SIZE_SHIFT                                           12
+#define   NVC0TCL_VTX_ATTR_DEFINE_SIZE_MASK                                            0x0000f000
+#define   NVC0TCL_VTX_ATTR_DEFINE_TYPE_SHIFT                                           16
+#define   NVC0TCL_VTX_ATTR_DEFINE_TYPE_MASK                                            0x000f0000
+#define    NVC0TCL_VTX_ATTR_DEFINE_TYPE_FLOAT                                          0x00070000
+#define    NVC0TCL_VTX_ATTR_DEFINE_TYPE_SNORM                                          0x00010000
+#define    NVC0TCL_VTX_ATTR_DEFINE_TYPE_UNORM                                          0x00020000
+#define    NVC0TCL_VTX_ATTR_DEFINE_TYPE_USCALED                                                0x00050000
+#define    NVC0TCL_VTX_ATTR_DEFINE_TYPE_SSCALED                                                0x00060000
+#define    NVC0TCL_VTX_ATTR_DEFINE_TYPE_UINT                                           0x00040000
+#define    NVC0TCL_VTX_ATTR_DEFINE_TYPE_SINT                                           0x00030000
+#define  NVC0TCL_VTX_ATTR_DATA(x)                                                      (0x00001150+((x)*4))
+#define  NVC0TCL_VTX_ATTR_DATA__SIZE                                                   0x00000004
+#define  NVC0TCL_VERTEX_ATTRIB_FORMAT(x)                                               (0x00001160+((x)*4))
+#define  NVC0TCL_VERTEX_ATTRIB_FORMAT__SIZE                                            0x00000020
+#define   NVC0TCL_VERTEX_ATTRIB_FORMAT_BUFFER_SHIFT                                    0
+#define   NVC0TCL_VERTEX_ATTRIB_FORMAT_BUFFER_MASK                                     0x0000003f
+#define   NVC0TCL_VERTEX_ATTRIB_FORMAT_CONST                                           (1 <<  6)
+#define   NVC0TCL_VERTEX_ATTRIB_FORMAT_OFFSET_SHIFT                                    7
+#define   NVC0TCL_VERTEX_ATTRIB_FORMAT_OFFSET_MASK                                     0x001fff80
+#define   NVC0TCL_VERTEX_ATTRIB_FORMAT_FORMAT_SHIFT                                    21
+#define   NVC0TCL_VERTEX_ATTRIB_FORMAT_FORMAT_MASK                                     0x07e00000
+#define    NVC0TCL_VERTEX_ATTRIB_FORMAT_FORMAT_32_32_32_32                             0x00200000
+#define    NVC0TCL_VERTEX_ATTRIB_FORMAT_FORMAT_32_32_32                                        0x00400000
+#define    NVC0TCL_VERTEX_ATTRIB_FORMAT_FORMAT_16_16_16_16                             0x00600000
+#define    NVC0TCL_VERTEX_ATTRIB_FORMAT_FORMAT_32_32                                   0x00800000
+#define    NVC0TCL_VERTEX_ATTRIB_FORMAT_FORMAT_16_16_16                                        0x00a00000
+#define    NVC0TCL_VERTEX_ATTRIB_FORMAT_FORMAT_8_8_8_8                                 0x01400000
+#define    NVC0TCL_VERTEX_ATTRIB_FORMAT_FORMAT_16_16                                   0x01e00000
+#define    NVC0TCL_VERTEX_ATTRIB_FORMAT_FORMAT_32                                      0x02400000
+#define    NVC0TCL_VERTEX_ATTRIB_FORMAT_FORMAT_8_8_8                                   0x02600000
+#define    NVC0TCL_VERTEX_ATTRIB_FORMAT_FORMAT_8_8                                     0x03000000
+#define    NVC0TCL_VERTEX_ATTRIB_FORMAT_FORMAT_16                                      0x03600000
+#define    NVC0TCL_VERTEX_ATTRIB_FORMAT_FORMAT_8                                       0x03a00000
+#define    NVC0TCL_VERTEX_ATTRIB_FORMAT_FORMAT_2_10_10_10                              0x06000000
+#define   NVC0TCL_VERTEX_ATTRIB_FORMAT_TYPE_SHIFT                                      27
+#define   NVC0TCL_VERTEX_ATTRIB_FORMAT_TYPE_MASK                                       0x78000000
+#define    NVC0TCL_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT                                     0x38000000
+#define    NVC0TCL_VERTEX_ATTRIB_FORMAT_TYPE_SNORM                                     0x08000000
+#define    NVC0TCL_VERTEX_ATTRIB_FORMAT_TYPE_UNORM                                     0x10000000
+#define    NVC0TCL_VERTEX_ATTRIB_FORMAT_TYPE_USCALED                                   0x28000000
+#define    NVC0TCL_VERTEX_ATTRIB_FORMAT_TYPE_SSCALED                                   0x30000000
+#define    NVC0TCL_VERTEX_ATTRIB_FORMAT_TYPE_UINT                                      0x20000000
+#define    NVC0TCL_VERTEX_ATTRIB_FORMAT_TYPE_SINT                                      0x18000000
+#define  NVC0TCL_RT_CONTROL                                                            0x0000121c
+#define   NVC0TCL_RT_CONTROL_COUNT_SHIFT                                               0
+#define   NVC0TCL_RT_CONTROL_COUNT_MASK                                                        0x0000000f
+#define   NVC0TCL_RT_CONTROL_MAP0_SHIFT                                                        4
+#define   NVC0TCL_RT_CONTROL_MAP0_MASK                                                 0x00000070
+#define   NVC0TCL_RT_CONTROL_MAP1_SHIFT                                                        7
+#define   NVC0TCL_RT_CONTROL_MAP1_MASK                                                 0x00000380
+#define   NVC0TCL_RT_CONTROL_MAP2_SHIFT                                                        10
+#define   NVC0TCL_RT_CONTROL_MAP2_MASK                                                 0x00001c00
+#define   NVC0TCL_RT_CONTROL_MAP3_SHIFT                                                        13
+#define   NVC0TCL_RT_CONTROL_MAP3_MASK                                                 0x0000e000
+#define   NVC0TCL_RT_CONTROL_MAP4_SHIFT                                                        16
+#define   NVC0TCL_RT_CONTROL_MAP4_MASK                                                 0x00070000
+#define   NVC0TCL_RT_CONTROL_MAP5_SHIFT                                                        19
+#define   NVC0TCL_RT_CONTROL_MAP5_MASK                                                 0x00380000
+#define   NVC0TCL_RT_CONTROL_MAP6_SHIFT                                                        22
+#define   NVC0TCL_RT_CONTROL_MAP6_MASK                                                 0x01c00000
+#define   NVC0TCL_RT_CONTROL_MAP7_SHIFT                                                        25
+#define   NVC0TCL_RT_CONTROL_MAP7_MASK                                                 0x0e000000
+#define  NVC0TCL_ZETA_HORIZ                                                            0x00001228
+#define  NVC0TCL_ZETA_VERT                                                             0x0000122c
+#define  NVC0TCL_ZETA_ARRAY_MODE                                                       0x00001230
+#define   NVC0TCL_ZETA_ARRAY_MODE_LAYERS_SHIFT                                         0
+#define   NVC0TCL_ZETA_ARRAY_MODE_LAYERS_MASK                                          0x0000ffff
+#define   NVC0TCL_ZETA_ARRAY_MODE_UNK                                                  (1 << 16)
+#define  NVC0TCL_LINKED_TSC                                                            0x00001234
+#define  NVC0TCL_FP_RESULT_COUNT                                                       0x00001298
+#define  NVC0TCL_DEPTH_TEST_ENABLE                                                     0x000012cc
+#define  NVC0TCL_SHADE_MODEL                                                           0x000012d4
+#define   NVC0TCL_SHADE_MODEL_FLAT                                                     0x00001d00
+#define   NVC0TCL_SHADE_MODEL_SMOOTH                                                   0x00001d01
+#define  NVC0TCL_BLEND_INDEPENDENT                                                     0x000012e4
+#define  NVC0TCL_DEPTH_WRITE_ENABLE                                                    0x000012e8
+#define  NVC0TCL_ALPHA_TEST_ENABLE                                                     0x000012ec
+#define  NVC0TCL_PM_SET(x)                                                             (0x000012f0+((x)*4))
+#define  NVC0TCL_PM_SET__SIZE                                                          0x00000004
+#define  NVC0TCL_VB_ELEMENT_U8_SETUP                                                   0x00001300
+#define   NVC0TCL_VB_ELEMENT_U8_SETUP_OFFSET_SHIFT                                     30
+#define   NVC0TCL_VB_ELEMENT_U8_SETUP_OFFSET_MASK                                      0xc0000000
+#define   NVC0TCL_VB_ELEMENT_U8_SETUP_COUNT_SHIFT                                      0
+#define   NVC0TCL_VB_ELEMENT_U8_SETUP_COUNT_MASK                                       0x3fffffff
+#define  NVC0TCL_VB_ELEMENT_U8                                                         0x00001304
+#define   NVC0TCL_VB_ELEMENT_U8_I0_SHIFT                                               0
+#define   NVC0TCL_VB_ELEMENT_U8_I0_MASK                                                        0x000000ff
+#define   NVC0TCL_VB_ELEMENT_U8_I1_SHIFT                                               8
+#define   NVC0TCL_VB_ELEMENT_U8_I1_MASK                                                        0x0000ff00
+#define   NVC0TCL_VB_ELEMENT_U8_I2_SHIFT                                               16
+#define   NVC0TCL_VB_ELEMENT_U8_I2_MASK                                                        0x00ff0000
+#define   NVC0TCL_VB_ELEMENT_U8_I3_SHIFT                                               24
+#define   NVC0TCL_VB_ELEMENT_U8_I3_MASK                                                        0xff000000
+#define  NVC0TCL_DEPTH_TEST_FUNC                                                       0x0000130c
+#define   NVC0TCL_DEPTH_TEST_FUNC_NEVER                                                        0x00000200
+#define   NVC0TCL_DEPTH_TEST_FUNC_LESS                                                 0x00000201
+#define   NVC0TCL_DEPTH_TEST_FUNC_EQUAL                                                        0x00000202
+#define   NVC0TCL_DEPTH_TEST_FUNC_LEQUAL                                               0x00000203
+#define   NVC0TCL_DEPTH_TEST_FUNC_GREATER                                              0x00000204
+#define   NVC0TCL_DEPTH_TEST_FUNC_NOTEQUAL                                             0x00000205
+#define   NVC0TCL_DEPTH_TEST_FUNC_GEQUAL                                               0x00000206
+#define   NVC0TCL_DEPTH_TEST_FUNC_ALWAYS                                               0x00000207
+#define  NVC0TCL_ALPHA_TEST_REF                                                                0x00001310
+#define  NVC0TCL_ALPHA_TEST_FUNC                                                       0x00001314
+#define   NVC0TCL_ALPHA_TEST_FUNC_NEVER                                                        0x00000200
+#define   NVC0TCL_ALPHA_TEST_FUNC_LESS                                                 0x00000201
+#define   NVC0TCL_ALPHA_TEST_FUNC_EQUAL                                                        0x00000202
+#define   NVC0TCL_ALPHA_TEST_FUNC_LEQUAL                                               0x00000203
+#define   NVC0TCL_ALPHA_TEST_FUNC_GREATER                                              0x00000204
+#define   NVC0TCL_ALPHA_TEST_FUNC_NOTEQUAL                                             0x00000205
+#define   NVC0TCL_ALPHA_TEST_FUNC_GEQUAL                                               0x00000206
+#define   NVC0TCL_ALPHA_TEST_FUNC_ALWAYS                                               0x00000207
+#define  NVC0TCL_BLEND_COLOR(x)                                                                (0x0000131c+((x)*4))
+#define  NVC0TCL_BLEND_COLOR__SIZE                                                     0x00000004
+#define  NVC0TCL_TIC_FLUSH                                                             0x00001330
+#define  NVC0TCL_TSC_FLUSH                                                             0x00001334
+#define  NVC0TCL_TEX_CACHE_CTL                                                         0x00001338
+#define  NVC0TCL_BLEND_EQUATION_RGB                                                    0x00001340
+#define   NVC0TCL_BLEND_EQUATION_RGB_FUNC_ADD                                          0x00008006
+#define   NVC0TCL_BLEND_EQUATION_RGB_MIN                                               0x00008007
+#define   NVC0TCL_BLEND_EQUATION_RGB_MAX                                               0x00008008
+#define   NVC0TCL_BLEND_EQUATION_RGB_FUNC_SUBTRACT                                     0x0000800a
+#define   NVC0TCL_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT                             0x0000800b
+#define  NVC0TCL_BLEND_FUNC_SRC_RGB                                                    0x00001344
+#define   NVC0TCL_BLEND_FUNC_SRC_RGB_ZERO                                              0x00004000
+#define   NVC0TCL_BLEND_FUNC_SRC_RGB_ONE                                               0x00004001
+#define   NVC0TCL_BLEND_FUNC_SRC_RGB_SRC_COLOR                                         0x00004300
+#define   NVC0TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR                               0x00004301
+#define   NVC0TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA                                         0x00004302
+#define   NVC0TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA                               0x00004303
+#define   NVC0TCL_BLEND_FUNC_SRC_RGB_DST_ALPHA                                         0x00004304
+#define   NVC0TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA                               0x00004305
+#define   NVC0TCL_BLEND_FUNC_SRC_RGB_DST_COLOR                                         0x00004306
+#define   NVC0TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR                               0x00004307
+#define   NVC0TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE                                        0x00004308
+#define   NVC0TCL_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR                                    0x0000c001
+#define   NVC0TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR                          0x0000c002
+#define   NVC0TCL_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA                                    0x0000c003
+#define   NVC0TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA                          0x0000c004
+#define   NVC0TCL_BLEND_FUNC_SRC_RGB_SRC1_COLOR                                                0x0000c900
+#define   NVC0TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC1_COLOR                              0x0000c901
+#define   NVC0TCL_BLEND_FUNC_SRC_RGB_SRC1_ALPHA                                                0x0000c902
+#define   NVC0TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC1_ALPHA                              0x0000c903
+#define  NVC0TCL_BLEND_FUNC_DST_RGB                                                    0x00001348
+#define   NVC0TCL_BLEND_FUNC_DST_RGB_ZERO                                              0x00004000
+#define   NVC0TCL_BLEND_FUNC_DST_RGB_ONE                                               0x00004001
+#define   NVC0TCL_BLEND_FUNC_DST_RGB_SRC_COLOR                                         0x00004300
+#define   NVC0TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_COLOR                               0x00004301
+#define   NVC0TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA                                         0x00004302
+#define   NVC0TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_ALPHA                               0x00004303
+#define   NVC0TCL_BLEND_FUNC_DST_RGB_DST_ALPHA                                         0x00004304
+#define   NVC0TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_ALPHA                               0x00004305
+#define   NVC0TCL_BLEND_FUNC_DST_RGB_DST_COLOR                                         0x00004306
+#define   NVC0TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_COLOR                               0x00004307
+#define   NVC0TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA_SATURATE                                        0x00004308
+#define   NVC0TCL_BLEND_FUNC_DST_RGB_CONSTANT_COLOR                                    0x0000c001
+#define   NVC0TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_COLOR                          0x0000c002
+#define   NVC0TCL_BLEND_FUNC_DST_RGB_CONSTANT_ALPHA                                    0x0000c003
+#define   NVC0TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_ALPHA                          0x0000c004
+#define   NVC0TCL_BLEND_FUNC_DST_RGB_SRC1_COLOR                                                0x0000c900
+#define   NVC0TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC1_COLOR                              0x0000c901
+#define   NVC0TCL_BLEND_FUNC_DST_RGB_SRC1_ALPHA                                                0x0000c902
+#define   NVC0TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC1_ALPHA                              0x0000c903
+#define  NVC0TCL_BLEND_EQUATION_ALPHA                                                  0x0000134c
+#define   NVC0TCL_BLEND_EQUATION_ALPHA_FUNC_ADD                                                0x00008006
+#define   NVC0TCL_BLEND_EQUATION_ALPHA_MIN                                             0x00008007
+#define   NVC0TCL_BLEND_EQUATION_ALPHA_MAX                                             0x00008008
+#define   NVC0TCL_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT                                   0x0000800a
+#define   NVC0TCL_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT                           0x0000800b
+#define  NVC0TCL_BLEND_FUNC_SRC_ALPHA                                                  0x00001350
+#define   NVC0TCL_BLEND_FUNC_SRC_ALPHA_ZERO                                            0x00004000
+#define   NVC0TCL_BLEND_FUNC_SRC_ALPHA_ONE                                             0x00004001
+#define   NVC0TCL_BLEND_FUNC_SRC_ALPHA_SRC_COLOR                                       0x00004300
+#define   NVC0TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_COLOR                             0x00004301
+#define   NVC0TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA                                       0x00004302
+#define   NVC0TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_ALPHA                             0x00004303
+#define   NVC0TCL_BLEND_FUNC_SRC_ALPHA_DST_ALPHA                                       0x00004304
+#define   NVC0TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_ALPHA                             0x00004305
+#define   NVC0TCL_BLEND_FUNC_SRC_ALPHA_DST_COLOR                                       0x00004306
+#define   NVC0TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_COLOR                             0x00004307
+#define   NVC0TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA_SATURATE                              0x00004308
+#define   NVC0TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_COLOR                                  0x0000c001
+#define   NVC0TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_COLOR                                0x0000c002
+#define   NVC0TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_ALPHA                                  0x0000c003
+#define   NVC0TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_ALPHA                                0x0000c004
+#define   NVC0TCL_BLEND_FUNC_SRC_ALPHA_SRC1_COLOR                                      0x0000c900
+#define   NVC0TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC1_COLOR                            0x0000c901
+#define   NVC0TCL_BLEND_FUNC_SRC_ALPHA_SRC1_ALPHA                                      0x0000c902
+#define   NVC0TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC1_ALPHA                            0x0000c903
+#define  NVC0TCL_BLEND_FUNC_DST_ALPHA                                                  0x00001358
+#define   NVC0TCL_BLEND_FUNC_DST_ALPHA_ZERO                                            0x00004000
+#define   NVC0TCL_BLEND_FUNC_DST_ALPHA_ONE                                             0x00004001
+#define   NVC0TCL_BLEND_FUNC_DST_ALPHA_SRC_COLOR                                       0x00004300
+#define   NVC0TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_COLOR                             0x00004301
+#define   NVC0TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA                                       0x00004302
+#define   NVC0TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_ALPHA                             0x00004303
+#define   NVC0TCL_BLEND_FUNC_DST_ALPHA_DST_ALPHA                                       0x00004304
+#define   NVC0TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_ALPHA                             0x00004305
+#define   NVC0TCL_BLEND_FUNC_DST_ALPHA_DST_COLOR                                       0x00004306
+#define   NVC0TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_COLOR                             0x00004307
+#define   NVC0TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA_SATURATE                              0x00004308
+#define   NVC0TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_COLOR                                  0x0000c001
+#define   NVC0TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_COLOR                                0x0000c002
+#define   NVC0TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_ALPHA                                  0x0000c003
+#define   NVC0TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_ALPHA                                0x0000c004
+#define   NVC0TCL_BLEND_FUNC_DST_ALPHA_SRC1_COLOR                                      0x0000c900
+#define   NVC0TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC1_COLOR                            0x0000c901
+#define   NVC0TCL_BLEND_FUNC_DST_ALPHA_SRC1_ALPHA                                      0x0000c902
+#define   NVC0TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC1_ALPHA                            0x0000c903
+#define  NVC0TCL_STENCIL_ENABLE                                                                0x00001380
+#define  NVC0TCL_STENCIL_FRONT_OP_FAIL                                                 0x00001384
+#define   NVC0TCL_STENCIL_FRONT_OP_FAIL_ZERO                                           0x00000000
+#define   NVC0TCL_STENCIL_FRONT_OP_FAIL_INVERT                                         0x0000150a
+#define   NVC0TCL_STENCIL_FRONT_OP_FAIL_KEEP                                           0x00001e00
+#define   NVC0TCL_STENCIL_FRONT_OP_FAIL_REPLACE                                                0x00001e01
+#define   NVC0TCL_STENCIL_FRONT_OP_FAIL_INCR                                           0x00001e02
+#define   NVC0TCL_STENCIL_FRONT_OP_FAIL_DECR                                           0x00001e03
+#define   NVC0TCL_STENCIL_FRONT_OP_FAIL_INCR_WRAP                                      0x00008507
+#define   NVC0TCL_STENCIL_FRONT_OP_FAIL_DECR_WRAP                                      0x00008508
+#define  NVC0TCL_STENCIL_FRONT_OP_ZFAIL                                                        0x00001388
+#define   NVC0TCL_STENCIL_FRONT_OP_ZFAIL_ZERO                                          0x00000000
+#define   NVC0TCL_STENCIL_FRONT_OP_ZFAIL_INVERT                                                0x0000150a
+#define   NVC0TCL_STENCIL_FRONT_OP_ZFAIL_KEEP                                          0x00001e00
+#define   NVC0TCL_STENCIL_FRONT_OP_ZFAIL_REPLACE                                       0x00001e01
+#define   NVC0TCL_STENCIL_FRONT_OP_ZFAIL_INCR                                          0x00001e02
+#define   NVC0TCL_STENCIL_FRONT_OP_ZFAIL_DECR                                          0x00001e03
+#define   NVC0TCL_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP                                     0x00008507
+#define   NVC0TCL_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP                                     0x00008508
+#define  NVC0TCL_STENCIL_FRONT_OP_ZPASS                                                        0x0000138c
+#define   NVC0TCL_STENCIL_FRONT_OP_ZPASS_ZERO                                          0x00000000
+#define   NVC0TCL_STENCIL_FRONT_OP_ZPASS_INVERT                                                0x0000150a
+#define   NVC0TCL_STENCIL_FRONT_OP_ZPASS_KEEP                                          0x00001e00
+#define   NVC0TCL_STENCIL_FRONT_OP_ZPASS_REPLACE                                       0x00001e01
+#define   NVC0TCL_STENCIL_FRONT_OP_ZPASS_INCR                                          0x00001e02
+#define   NVC0TCL_STENCIL_FRONT_OP_ZPASS_DECR                                          0x00001e03
+#define   NVC0TCL_STENCIL_FRONT_OP_ZPASS_INCR_WRAP                                     0x00008507
+#define   NVC0TCL_STENCIL_FRONT_OP_ZPASS_DECR_WRAP                                     0x00008508
+#define  NVC0TCL_STENCIL_FRONT_FUNC_FUNC                                               0x00001390
+#define   NVC0TCL_STENCIL_FRONT_FUNC_FUNC_NEVER                                                0x00000200
+#define   NVC0TCL_STENCIL_FRONT_FUNC_FUNC_LESS                                         0x00000201
+#define   NVC0TCL_STENCIL_FRONT_FUNC_FUNC_EQUAL                                                0x00000202
+#define   NVC0TCL_STENCIL_FRONT_FUNC_FUNC_LEQUAL                                       0x00000203
+#define   NVC0TCL_STENCIL_FRONT_FUNC_FUNC_GREATER                                      0x00000204
+#define   NVC0TCL_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL                                     0x00000205
+#define   NVC0TCL_STENCIL_FRONT_FUNC_FUNC_GEQUAL                                       0x00000206
+#define   NVC0TCL_STENCIL_FRONT_FUNC_FUNC_ALWAYS                                       0x00000207
+#define  NVC0TCL_STENCIL_FRONT_FUNC_REF                                                        0x00001394
+#define  NVC0TCL_STENCIL_FRONT_MASK                                                    0x00001398
+#define  NVC0TCL_STENCIL_FRONT_FUNC_MASK                                               0x0000139c
+#define  NVC0TCL_FRAG_COLOR_CLAMP_EN                                                   0x000013a8
+#define  NVC0TCL_Y_ORIGIN_BOTTOM                                                       0x000013ac
+#define  NVC0TCL_LINE_WIDTH(x)                                                         (0x000013b0+((x)*4))
+#define  NVC0TCL_LINE_WIDTH__SIZE                                                      0x00000002
+#define  NVC0TCL_POINT_COORD_REPLACE_MAP(x)                                            (0x000013c0+((x)*4))
+#define  NVC0TCL_POINT_COORD_REPLACE_MAP__SIZE                                         0x00000008
+#define  NVC0TCL_GP_VERTEX_OUTPUT_COUNT                                                        0x00001420
+#define  NVC0TCL_FENCE                                                                 0x0000142c
+#define  NVC0TCL_VB_ELEMENT_BASE                                                       0x00001434
+#define  NVC0TCL_INSTANCE_BASE                                                         0x00001438
+#define  NVC0TCL_CODE_CB_FLUSH                                                         0x00001440
+#define  NVC0TCL_CLIPID_HEIGHT                                                         0x00001504
+#define  NVC0TCL_VP_CLIP_DISTANCE_ENABLE                                               0x00001510
+#define   NVC0TCL_VP_CLIP_DISTANCE_ENABLE_0                                            (1 <<  0)
+#define   NVC0TCL_VP_CLIP_DISTANCE_ENABLE_1                                            (1 <<  1)
+#define   NVC0TCL_VP_CLIP_DISTANCE_ENABLE_2                                            (1 <<  2)
+#define   NVC0TCL_VP_CLIP_DISTANCE_ENABLE_3                                            (1 <<  3)
+#define   NVC0TCL_VP_CLIP_DISTANCE_ENABLE_4                                            (1 <<  4)
+#define   NVC0TCL_VP_CLIP_DISTANCE_ENABLE_5                                            (1 <<  5)
+#define   NVC0TCL_VP_CLIP_DISTANCE_ENABLE_6                                            (1 <<  6)
+#define   NVC0TCL_VP_CLIP_DISTANCE_ENABLE_7                                            (1 <<  7)
+#define  NVC0TCL_SAMPLECNT_ENABLE                                                      0x00001514
+#define  NVC0TCL_POINT_SIZE                                                            0x00001518
+#define  NVC0TCL_POINT_SPRITE_ENABLE                                                   0x00001520
+#define  NVC0TCL_SAMPLECNT_RESET                                                       0x00001530
+#define  NVC0TCL_MULTISAMPLE_ZETA_ENABLE                                               0x00001534
+#define  NVC0TCL_ZETA_ENABLE                                                           0x00001538
+#define  NVC0TCL_MULTISAMPLE_CTRL                                                      0x0000153c
+#define   NVC0TCL_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE                                   (1 <<  0)
+#define   NVC0TCL_MULTISAMPLE_CTRL_ALPHA_TO_ONE                                                (1 <<  4)
+#define  NVC0TCL_NOPERSPECTIVE_BITMAP(x)                                               (0x00001540+((x)*4))
+#define  NVC0TCL_NOPERSPECTIVE_BITMAP__SIZE                                            0x00000004
+#define  NVC0TCL_COND_ADDRESS_HIGH                                                     0x00001550
+#define  NVC0TCL_COND_ADDRESS_LOW                                                      0x00001554
+#define  NVC0TCL_COND_MODE                                                             0x00001558
+#define   NVC0TCL_COND_MODE_NEVER                                                      0x00000000
+#define   NVC0TCL_COND_MODE_ALWAYS                                                     0x00000001
+#define   NVC0TCL_COND_MODE_RES                                                                0x00000002
+#define   NVC0TCL_COND_MODE_NOT_RES_AND_NOT_ID                                         0x00000003
+#define   NVC0TCL_COND_MODE_RES_OR_ID                                                  0x00000004
+#define  NVC0TCL_TSC_ADDRESS_HIGH                                                      0x0000155c
+#define  NVC0TCL_TSC_ADDRESS_LOW                                                       0x00001560
+#define  NVC0TCL_TSC_LIMIT                                                             0x00001564
+#define  NVC0TCL_POLYGON_OFFSET_FACTOR                                                 0x0000156c
+#define  NVC0TCL_LINE_SMOOTH_ENABLE                                                    0x00001570
+#define  NVC0TCL_TIC_ADDRESS_HIGH                                                      0x00001574
+#define  NVC0TCL_TIC_ADDRESS_LOW                                                       0x00001578
+#define  NVC0TCL_TIC_LIMIT                                                             0x0000157c
+#define  NVC0TCL_PM_CONTROL(x)                                                         (0x00001580+((x)*4))
+#define  NVC0TCL_PM_CONTROL__SIZE                                                      0x00000004
+#define   NVC0TCL_PM_CONTROL_UNK0                                                      (1 <<  0)
+#define   NVC0TCL_PM_CONTROL_UNK1_SHIFT                                                        4
+#define   NVC0TCL_PM_CONTROL_UNK1_MASK                                                 0x00000070
+#define   NVC0TCL_PM_CONTROL_UNK2_SHIFT                                                        8
+#define   NVC0TCL_PM_CONTROL_UNK2_MASK                                                 0xffffff00
+#define  NVC0TCL_STENCIL_TWO_SIDE_ENABLE                                               0x00001594
+#define  NVC0TCL_STENCIL_BACK_OP_FAIL                                                  0x00001598
+#define   NVC0TCL_STENCIL_BACK_OP_FAIL_ZERO                                            0x00000000
+#define   NVC0TCL_STENCIL_BACK_OP_FAIL_INVERT                                          0x0000150a
+#define   NVC0TCL_STENCIL_BACK_OP_FAIL_KEEP                                            0x00001e00
+#define   NVC0TCL_STENCIL_BACK_OP_FAIL_REPLACE                                         0x00001e01
+#define   NVC0TCL_STENCIL_BACK_OP_FAIL_INCR                                            0x00001e02
+#define   NVC0TCL_STENCIL_BACK_OP_FAIL_DECR                                            0x00001e03
+#define   NVC0TCL_STENCIL_BACK_OP_FAIL_INCR_WRAP                                       0x00008507
+#define   NVC0TCL_STENCIL_BACK_OP_FAIL_DECR_WRAP                                       0x00008508
+#define  NVC0TCL_STENCIL_BACK_OP_ZFAIL                                                 0x0000159c
+#define   NVC0TCL_STENCIL_BACK_OP_ZFAIL_ZERO                                           0x00000000
+#define   NVC0TCL_STENCIL_BACK_OP_ZFAIL_INVERT                                         0x0000150a
+#define   NVC0TCL_STENCIL_BACK_OP_ZFAIL_KEEP                                           0x00001e00
+#define   NVC0TCL_STENCIL_BACK_OP_ZFAIL_REPLACE                                                0x00001e01
+#define   NVC0TCL_STENCIL_BACK_OP_ZFAIL_INCR                                           0x00001e02
+#define   NVC0TCL_STENCIL_BACK_OP_ZFAIL_DECR                                           0x00001e03
+#define   NVC0TCL_STENCIL_BACK_OP_ZFAIL_INCR_WRAP                                      0x00008507
+#define   NVC0TCL_STENCIL_BACK_OP_ZFAIL_DECR_WRAP                                      0x00008508
+#define  NVC0TCL_STENCIL_BACK_OP_ZPASS                                                 0x000015a0
+#define   NVC0TCL_STENCIL_BACK_OP_ZPASS_ZERO                                           0x00000000
+#define   NVC0TCL_STENCIL_BACK_OP_ZPASS_INVERT                                         0x0000150a
+#define   NVC0TCL_STENCIL_BACK_OP_ZPASS_KEEP                                           0x00001e00
+#define   NVC0TCL_STENCIL_BACK_OP_ZPASS_REPLACE                                                0x00001e01
+#define   NVC0TCL_STENCIL_BACK_OP_ZPASS_INCR                                           0x00001e02
+#define   NVC0TCL_STENCIL_BACK_OP_ZPASS_DECR                                           0x00001e03
+#define   NVC0TCL_STENCIL_BACK_OP_ZPASS_INCR_WRAP                                      0x00008507
+#define   NVC0TCL_STENCIL_BACK_OP_ZPASS_DECR_WRAP                                      0x00008508
+#define  NVC0TCL_STENCIL_BACK_FUNC_FUNC                                                        0x000015a4
+#define   NVC0TCL_STENCIL_BACK_FUNC_FUNC_NEVER                                         0x00000200
+#define   NVC0TCL_STENCIL_BACK_FUNC_FUNC_LESS                                          0x00000201
+#define   NVC0TCL_STENCIL_BACK_FUNC_FUNC_EQUAL                                         0x00000202
+#define   NVC0TCL_STENCIL_BACK_FUNC_FUNC_LEQUAL                                                0x00000203
+#define   NVC0TCL_STENCIL_BACK_FUNC_FUNC_GREATER                                       0x00000204
+#define   NVC0TCL_STENCIL_BACK_FUNC_FUNC_NOTEQUAL                                      0x00000205
+#define   NVC0TCL_STENCIL_BACK_FUNC_FUNC_GEQUAL                                                0x00000206
+#define   NVC0TCL_STENCIL_BACK_FUNC_FUNC_ALWAYS                                                0x00000207
+#define  NVC0TCL_MULTISAMPLE_COLOR_ENABLE                                              0x000015b4
+#define  NVC0TCL_FRAMEBUFFER_SRGB                                                      0x000015b8
+#define  NVC0TCL_POLYGON_OFFSET_UNITS                                                  0x000015bc
+#define  NVC0TCL_GP_BUILTIN_RESULT_EN                                                  0x000015cc
+#define   NVC0TCL_GP_BUILTIN_RESULT_EN_VPORT                                           (1 <<  0)
+#define   NVC0TCL_GP_BUILTIN_RESULT_EN_LAYER                                           (1 << 16)
+#define  NVC0TCL_MULTISAMPLE_MODE                                                      0x000015d0
+#define   NVC0TCL_MULTISAMPLE_MODE_1X                                                  0x00000000
+#define   NVC0TCL_MULTISAMPLE_MODE_2XMS                                                        0x00000001
+#define   NVC0TCL_MULTISAMPLE_MODE_4XMS                                                        0x00000002
+#define   NVC0TCL_MULTISAMPLE_MODE_8XMS                                                        0x00000004
+#define   NVC0TCL_MULTISAMPLE_MODE_4XMS_4XCS                                           0x00000008
+#define   NVC0TCL_MULTISAMPLE_MODE_4XMS_12XCS                                          0x00000009
+#define   NVC0TCL_MULTISAMPLE_MODE_8XMS_8XCS                                           0x0000000a
+#define  NVC0TCL_EDGEFLAG_ENABLE                                                       0x000015e4
+#define  NVC0TCL_VB_ELEMENT_U32                                                                0x000015e8
+#define  NVC0TCL_VB_ELEMENT_U16_SETUP                                                  0x000015ec
+#define   NVC0TCL_VB_ELEMENT_U16_SETUP_OFFSET_SHIFT                                    30
+#define   NVC0TCL_VB_ELEMENT_U16_SETUP_OFFSET_MASK                                     0xc0000000
+#define   NVC0TCL_VB_ELEMENT_U16_SETUP_COUNT_SHIFT                                     0
+#define   NVC0TCL_VB_ELEMENT_U16_SETUP_COUNT_MASK                                      0x3fffffff
+#define  NVC0TCL_VB_ELEMENT_U16                                                                0x000015f0
+#define   NVC0TCL_VB_ELEMENT_U16_I0_SHIFT                                              0
+#define   NVC0TCL_VB_ELEMENT_U16_I0_MASK                                               0x0000ffff
+#define   NVC0TCL_VB_ELEMENT_U16_I1_SHIFT                                              16
+#define   NVC0TCL_VB_ELEMENT_U16_I1_MASK                                               0xffff0000
+#define  NVC0TCL_VERTEX_BASE_HIGH                                                      0x000015f4
+#define  NVC0TCL_VERTEX_BASE_LOW                                                       0x000015f8
+#define  NVC0TCL_CODE_ADDRESS_HIGH                                                     0x00001608
+#define  NVC0TCL_CODE_ADDRESS_LOW                                                      0x0000160c
+#define  NVC0TCL_VERTEX_BEGIN                                                          0x00001618
+#define   NVC0TCL_VERTEX_BEGIN_MODE_SHIFT                                              0
+#define   NVC0TCL_VERTEX_BEGIN_MODE_MASK                                               0x0000000f
+#define    NVC0TCL_VERTEX_BEGIN_MODE_POINTS                                            0x00000000
+#define    NVC0TCL_VERTEX_BEGIN_MODE_LINES                                             0x00000001
+#define    NVC0TCL_VERTEX_BEGIN_MODE_LINE_LOOP                                         0x00000002
+#define    NVC0TCL_VERTEX_BEGIN_MODE_LINE_STRIP                                                0x00000003
+#define    NVC0TCL_VERTEX_BEGIN_MODE_TRIANGLES                                         0x00000004
+#define    NVC0TCL_VERTEX_BEGIN_MODE_TRIANGLE_STRIP                                    0x00000005
+#define    NVC0TCL_VERTEX_BEGIN_MODE_TRIANGLE_FAN                                      0x00000006
+#define    NVC0TCL_VERTEX_BEGIN_MODE_QUADS                                             0x00000007
+#define    NVC0TCL_VERTEX_BEGIN_MODE_QUAD_STRIP                                                0x00000008
+#define    NVC0TCL_VERTEX_BEGIN_MODE_POLYGON                                           0x00000009
+#define    NVC0TCL_VERTEX_BEGIN_MODE_LINES_ADJACENCY                                   0x0000000a
+#define    NVC0TCL_VERTEX_BEGIN_MODE_LINE_STRIP_ADJACENCY                              0x0000000b
+#define    NVC0TCL_VERTEX_BEGIN_MODE_TRIANGLES_ADJACENCY                               0x0000000c
+#define    NVC0TCL_VERTEX_BEGIN_MODE_TRIANGLE_STRIP_ADJACENCY                          0x0000000d
+#define    NVC0TCL_VERTEX_BEGIN_MODE_PATCHES                                           0x0000000e
+#define   NVC0TCL_VERTEX_BEGIN_INSTANCE                                                        (1 << 26)
+#define  NVC0TCL_VERTEX_END                                                            0x00001614
+#define  NVC0TCL_VERTEX_DATA                                                           0x00001640
+#define  NVC0TCL_PRIM_RESTART_ENABLE                                                   0x00001644
+#define  NVC0TCL_PRIM_RESTART_INDEX                                                    0x00001648
+#define  NVC0TCL_POINT_SMOOTH_ENABLE                                                   0x00001658
+#define  NVC0TCL_POINT_SPRITE_CTRL                                                     0x00001660
+#define  NVC0TCL_LINE_STIPPLE_ENABLE                                                   0x0000166c
+#define  NVC0TCL_LINE_STIPPLE_PATTERN                                                  0x00001680
+#define  NVC0TCL_PROVOKING_VERTEX_LAST                                                 0x00001684
+#define  NVC0TCL_VERTEX_TWO_SIDE_ENABLE                                                        0x00001688
+#define  NVC0TCL_POLYGON_STIPPLE_ENABLE                                                        0x0000168c
+#define  NVC0TCL_POLYGON_STIPPLE_PATTERN(x)                                            (0x00001700+((x)*4))
+#define  NVC0TCL_POLYGON_STIPPLE_PATTERN__SIZE                                         0x00000020
+#define  NVC0TCL_UNK17BC_ADDRESS_HIGH                                                  0x000017bc
+#define  NVC0TCL_UNK17BC_ADDRESS_LOW                                                   0x000017c0
+#define  NVC0TCL_UNK17BC_LIMIT                                                         0x000017c4
+#define  NVC0TCL_VP_POINT_SIZE_EN                                                      0x00001910
+#define  NVC0TCL_CULL_FACE_ENABLE                                                      0x00001918
+#define  NVC0TCL_FRONT_FACE                                                            0x0000191c
+#define   NVC0TCL_FRONT_FACE_CW                                                                0x00000900
+#define   NVC0TCL_FRONT_FACE_CCW                                                       0x00000901
+#define  NVC0TCL_CULL_FACE                                                             0x00001920
+#define   NVC0TCL_CULL_FACE_FRONT                                                      0x00000404
+#define   NVC0TCL_CULL_FACE_BACK                                                       0x00000405
+#define   NVC0TCL_CULL_FACE_FRONT_AND_BACK                                             0x00000408
+#define  NVC0TCL_VIEWPORT_TRANSFORM_EN                                                 0x0000192c
+#define  NVC0TCL_VIEW_VOLUME_CLIP_CTRL                                                 0x0000193c
+#define  NVC0TCL_VIEWPORT_CLIP_RECTS_EN                                                        0x0000194c
+#define  NVC0TCL_VIEWPORT_CLIP_MODE                                                    0x00001950
+#define   NVC0TCL_VIEWPORT_CLIP_MODE_INCLUDE                                           0x00000000
+#define   NVC0TCL_VIEWPORT_CLIP_MODE_EXCLUDE                                           0x00000001
+#define   NVC0TCL_VIEWPORT_CLIP_MODE_UNKNOWN                                           0x00000002
+#define  NVC0TCL_FP_ZORDER_CTRL                                                                0x0000196c
+#define  NVC0TCL_CLIPID_ENABLE                                                         0x0000197c
+#define  NVC0TCL_CLIPID_WIDTH                                                          0x00001980
+#define  NVC0TCL_CLIPID_ID                                                             0x00001984
+#define  NVC0TCL_REG_MODE                                                              0x000019a0
+#define   NVC0TCL_REG_MODE_PACKED                                                      0x00000001
+#define   NVC0TCL_REG_MODE_STRIPED                                                     0x00000002
+#define  NVC0TCL_FP_CONTROL                                                            0x000019a8
+#define   NVC0TCL_FP_CONTROL_MULTIPLE_RESULTS                                          (1 <<  0)
+#define   NVC0TCL_FP_CONTROL_EXPORTS_Z                                                 (1 <<  8)
+#define   NVC0TCL_FP_CONTROL_USES_KIL                                                  (1 << 20)
+#define  NVC0TCL_DEPTH_BOUNDS_EN                                                       0x000019bc
+#define  NVC0TCL_LOGIC_OP_ENABLE                                                       0x000019c4
+#define  NVC0TCL_LOGIC_OP                                                              0x000019c8
+#define   NVC0TCL_LOGIC_OP_CLEAR                                                       0x00001500
+#define   NVC0TCL_LOGIC_OP_AND                                                         0x00001501
+#define   NVC0TCL_LOGIC_OP_AND_REVERSE                                                 0x00001502
+#define   NVC0TCL_LOGIC_OP_COPY                                                                0x00001503
+#define   NVC0TCL_LOGIC_OP_AND_INVERTED                                                        0x00001504
+#define   NVC0TCL_LOGIC_OP_NOOP                                                                0x00001505
+#define   NVC0TCL_LOGIC_OP_XOR                                                         0x00001506
+#define   NVC0TCL_LOGIC_OP_OR                                                          0x00001507
+#define   NVC0TCL_LOGIC_OP_NOR                                                         0x00001508
+#define   NVC0TCL_LOGIC_OP_EQUIV                                                       0x00001509
+#define   NVC0TCL_LOGIC_OP_INVERT                                                      0x0000150a
+#define   NVC0TCL_LOGIC_OP_OR_REVERSE                                                  0x0000150b
+#define   NVC0TCL_LOGIC_OP_COPY_INVERTED                                               0x0000150c
+#define   NVC0TCL_LOGIC_OP_OR_INVERTED                                                 0x0000150d
+#define   NVC0TCL_LOGIC_OP_NAND                                                                0x0000150e
+#define   NVC0TCL_LOGIC_OP_SET                                                         0x0000150f
+#define  NVC0TCL_CLEAR_BUFFERS                                                         0x000019d0
+#define   NVC0TCL_CLEAR_BUFFERS_Z                                                      (1 <<  0)
+#define   NVC0TCL_CLEAR_BUFFERS_S                                                      (1 <<  1)
+#define   NVC0TCL_CLEAR_BUFFERS_R                                                      (1 <<  2)
+#define   NVC0TCL_CLEAR_BUFFERS_G                                                      (1 <<  3)
+#define   NVC0TCL_CLEAR_BUFFERS_B                                                      (1 <<  4)
+#define   NVC0TCL_CLEAR_BUFFERS_A                                                      (1 <<  5)
+#define   NVC0TCL_CLEAR_BUFFERS_RT_SHIFT                                               6
+#define   NVC0TCL_CLEAR_BUFFERS_RT_MASK                                                        0x000003c0
+#define   NVC0TCL_CLEAR_BUFFERS_LAYER_SHIFT                                            10
+#define   NVC0TCL_CLEAR_BUFFERS_LAYER_MASK                                             0x0007fc00
+#define  NVC0TCL_COLOR_MASK(x)                                                         (0x00001a00+((x)*4))
+#define  NVC0TCL_COLOR_MASK__SIZE                                                      0x00000008
+#define   NVC0TCL_COLOR_MASK_R_SHIFT                                                   0
+#define   NVC0TCL_COLOR_MASK_R_MASK                                                    0x0000000f
+#define   NVC0TCL_COLOR_MASK_G_SHIFT                                                   4
+#define   NVC0TCL_COLOR_MASK_G_MASK                                                    0x000000f0
+#define   NVC0TCL_COLOR_MASK_B_SHIFT                                                   8
+#define   NVC0TCL_COLOR_MASK_B_MASK                                                    0x00000f00
+#define   NVC0TCL_COLOR_MASK_A_SHIFT                                                   12
+#define   NVC0TCL_COLOR_MASK_A_MASK                                                    0x0000f000
+#define  NVC0TCL_QUERY_ADDRESS_HIGH                                                    0x00001b00
+#define  NVC0TCL_QUERY_ADDRESS_LOW                                                     0x00001b04
+#define  NVC0TCL_QUERY_SEQUENCE                                                                0x00001b08
+#define  NVC0TCL_QUERY_GET                                                             0x00001b0c
+#define  NVC0TCL_VERTEX_ARRAY_FETCH(x)                                                 (0x00001c00+((x)*16))
+#define  NVC0TCL_VERTEX_ARRAY_FETCH__SIZE                                              0x00000020
+#define   NVC0TCL_VERTEX_ARRAY_FETCH_STRIDE_SHIFT                                      0
+#define   NVC0TCL_VERTEX_ARRAY_FETCH_STRIDE_MASK                                       0x00000fff
+#define   NVC0TCL_VERTEX_ARRAY_FETCH_ENABLE                                            (1 << 12)
+#define  NVC0TCL_BLEND_EQUATIONI_RGB(x)                                                        (0x00001e04+((x)*32))
+#define  NVC0TCL_BLEND_EQUATIONI_RGB__SIZE                                             0x00000008
+#define   NVC0TCL_BLEND_EQUATIONI_RGB_FUNC_ADD                                         0x00008006
+#define   NVC0TCL_BLEND_EQUATIONI_RGB_MIN                                              0x00008007
+#define   NVC0TCL_BLEND_EQUATIONI_RGB_MAX                                              0x00008008
+#define   NVC0TCL_BLEND_EQUATIONI_RGB_FUNC_SUBTRACT                                    0x0000800a
+#define   NVC0TCL_BLEND_EQUATIONI_RGB_FUNC_REVERSE_SUBTRACT                            0x0000800b
+#define  NVC0TCL_BLEND_FUNCI_SRC_RGB(x)                                                        (0x00001e08+((x)*32))
+#define  NVC0TCL_BLEND_FUNCI_SRC_RGB__SIZE                                             0x00000008
+#define   NVC0TCL_BLEND_FUNCI_SRC_RGB_ZERO                                             0x00004000
+#define   NVC0TCL_BLEND_FUNCI_SRC_RGB_ONE                                              0x00004001
+#define   NVC0TCL_BLEND_FUNCI_SRC_RGB_SRC_COLOR                                                0x00004300
+#define   NVC0TCL_BLEND_FUNCI_SRC_RGB_ONE_MINUS_SRC_COLOR                              0x00004301
+#define   NVC0TCL_BLEND_FUNCI_SRC_RGB_SRC_ALPHA                                                0x00004302
+#define   NVC0TCL_BLEND_FUNCI_SRC_RGB_ONE_MINUS_SRC_ALPHA                              0x00004303
+#define   NVC0TCL_BLEND_FUNCI_SRC_RGB_DST_ALPHA                                                0x00004304
+#define   NVC0TCL_BLEND_FUNCI_SRC_RGB_ONE_MINUS_DST_ALPHA                              0x00004305
+#define   NVC0TCL_BLEND_FUNCI_SRC_RGB_DST_COLOR                                                0x00004306
+#define   NVC0TCL_BLEND_FUNCI_SRC_RGB_ONE_MINUS_DST_COLOR                              0x00004307
+#define   NVC0TCL_BLEND_FUNCI_SRC_RGB_SRC_ALPHA_SATURATE                               0x00004308
+#define   NVC0TCL_BLEND_FUNCI_SRC_RGB_CONSTANT_COLOR                                   0x0000c001
+#define   NVC0TCL_BLEND_FUNCI_SRC_RGB_ONE_MINUS_CONSTANT_COLOR                         0x0000c002
+#define   NVC0TCL_BLEND_FUNCI_SRC_RGB_CONSTANT_ALPHA                                   0x0000c003
+#define   NVC0TCL_BLEND_FUNCI_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA                         0x0000c004
+#define   NVC0TCL_BLEND_FUNCI_SRC_RGB_SRC1_COLOR                                       0x0000c900
+#define   NVC0TCL_BLEND_FUNCI_SRC_RGB_ONE_MINUS_SRC1_COLOR                             0x0000c901
+#define   NVC0TCL_BLEND_FUNCI_SRC_RGB_SRC1_ALPHA                                       0x0000c902
+#define   NVC0TCL_BLEND_FUNCI_SRC_RGB_ONE_MINUS_SRC1_ALPHA                             0x0000c903
+#define  NVC0TCL_BLEND_FUNCI_DST_RGB(x)                                                        (0x00001e0c+((x)*32))
+#define  NVC0TCL_BLEND_FUNCI_DST_RGB__SIZE                                             0x00000008
+#define   NVC0TCL_BLEND_FUNCI_DST_RGB_ZERO                                             0x00004000
+#define   NVC0TCL_BLEND_FUNCI_DST_RGB_ONE                                              0x00004001
+#define   NVC0TCL_BLEND_FUNCI_DST_RGB_SRC_COLOR                                                0x00004300
+#define   NVC0TCL_BLEND_FUNCI_DST_RGB_ONE_MINUS_SRC_COLOR                              0x00004301
+#define   NVC0TCL_BLEND_FUNCI_DST_RGB_SRC_ALPHA                                                0x00004302
+#define   NVC0TCL_BLEND_FUNCI_DST_RGB_ONE_MINUS_SRC_ALPHA                              0x00004303
+#define   NVC0TCL_BLEND_FUNCI_DST_RGB_DST_ALPHA                                                0x00004304
+#define   NVC0TCL_BLEND_FUNCI_DST_RGB_ONE_MINUS_DST_ALPHA                              0x00004305
+#define   NVC0TCL_BLEND_FUNCI_DST_RGB_DST_COLOR                                                0x00004306
+#define   NVC0TCL_BLEND_FUNCI_DST_RGB_ONE_MINUS_DST_COLOR                              0x00004307
+#define   NVC0TCL_BLEND_FUNCI_DST_RGB_SRC_ALPHA_SATURATE                               0x00004308
+#define   NVC0TCL_BLEND_FUNCI_DST_RGB_CONSTANT_COLOR                                   0x0000c001
+#define   NVC0TCL_BLEND_FUNCI_DST_RGB_ONE_MINUS_CONSTANT_COLOR                         0x0000c002
+#define   NVC0TCL_BLEND_FUNCI_DST_RGB_CONSTANT_ALPHA                                   0x0000c003
+#define   NVC0TCL_BLEND_FUNCI_DST_RGB_ONE_MINUS_CONSTANT_ALPHA                         0x0000c004
+#define   NVC0TCL_BLEND_FUNCI_DST_RGB_SRC1_COLOR                                       0x0000c900
+#define   NVC0TCL_BLEND_FUNCI_DST_RGB_ONE_MINUS_SRC1_COLOR                             0x0000c901
+#define   NVC0TCL_BLEND_FUNCI_DST_RGB_SRC1_ALPHA                                       0x0000c902
+#define   NVC0TCL_BLEND_FUNCI_DST_RGB_ONE_MINUS_SRC1_ALPHA                             0x0000c903
+#define  NVC0TCL_BLEND_EQUATIONI_ALPHA(x)                                              (0x00001e10+((x)*32))
+#define  NVC0TCL_BLEND_EQUATIONI_ALPHA__SIZE                                           0x00000008
+#define   NVC0TCL_BLEND_EQUATIONI_ALPHA_FUNC_ADD                                       0x00008006
+#define   NVC0TCL_BLEND_EQUATIONI_ALPHA_MIN                                            0x00008007
+#define   NVC0TCL_BLEND_EQUATIONI_ALPHA_MAX                                            0x00008008
+#define   NVC0TCL_BLEND_EQUATIONI_ALPHA_FUNC_SUBTRACT                                  0x0000800a
+#define   NVC0TCL_BLEND_EQUATIONI_ALPHA_FUNC_REVERSE_SUBTRACT                          0x0000800b
+#define  NVC0TCL_BLEND_FUNCI_SRC_ALPHA(x)                                              (0x00001e14+((x)*32))
+#define  NVC0TCL_BLEND_FUNCI_SRC_ALPHA__SIZE                                           0x00000008
+#define   NVC0TCL_BLEND_FUNCI_SRC_ALPHA_ZERO                                           0x00004000
+#define   NVC0TCL_BLEND_FUNCI_SRC_ALPHA_ONE                                            0x00004001
+#define   NVC0TCL_BLEND_FUNCI_SRC_ALPHA_SRC_COLOR                                      0x00004300
+#define   NVC0TCL_BLEND_FUNCI_SRC_ALPHA_ONE_MINUS_SRC_COLOR                            0x00004301
+#define   NVC0TCL_BLEND_FUNCI_SRC_ALPHA_SRC_ALPHA                                      0x00004302
+#define   NVC0TCL_BLEND_FUNCI_SRC_ALPHA_ONE_MINUS_SRC_ALPHA                            0x00004303
+#define   NVC0TCL_BLEND_FUNCI_SRC_ALPHA_DST_ALPHA                                      0x00004304
+#define   NVC0TCL_BLEND_FUNCI_SRC_ALPHA_ONE_MINUS_DST_ALPHA                            0x00004305
+#define   NVC0TCL_BLEND_FUNCI_SRC_ALPHA_DST_COLOR                                      0x00004306
+#define   NVC0TCL_BLEND_FUNCI_SRC_ALPHA_ONE_MINUS_DST_COLOR                            0x00004307
+#define   NVC0TCL_BLEND_FUNCI_SRC_ALPHA_SRC_ALPHA_SATURATE                             0x00004308
+#define   NVC0TCL_BLEND_FUNCI_SRC_ALPHA_CONSTANT_COLOR                                 0x0000c001
+#define   NVC0TCL_BLEND_FUNCI_SRC_ALPHA_ONE_MINUS_CONSTANT_COLOR                       0x0000c002
+#define   NVC0TCL_BLEND_FUNCI_SRC_ALPHA_CONSTANT_ALPHA                                 0x0000c003
+#define   NVC0TCL_BLEND_FUNCI_SRC_ALPHA_ONE_MINUS_CONSTANT_ALPHA                       0x0000c004
+#define   NVC0TCL_BLEND_FUNCI_SRC_ALPHA_SRC1_COLOR                                     0x0000c900
+#define   NVC0TCL_BLEND_FUNCI_SRC_ALPHA_ONE_MINUS_SRC1_COLOR                           0x0000c901
+#define   NVC0TCL_BLEND_FUNCI_SRC_ALPHA_SRC1_ALPHA                                     0x0000c902
+#define   NVC0TCL_BLEND_FUNCI_SRC_ALPHA_ONE_MINUS_SRC1_ALPHA                           0x0000c903
+#define  NVC0TCL_BLEND_FUNCI_DST_ALPHA(x)                                              (0x00001e18+((x)*32))
+#define  NVC0TCL_BLEND_FUNCI_DST_ALPHA__SIZE                                           0x00000008
+#define   NVC0TCL_BLEND_FUNCI_DST_ALPHA_ZERO                                           0x00004000
+#define   NVC0TCL_BLEND_FUNCI_DST_ALPHA_ONE                                            0x00004001
+#define   NVC0TCL_BLEND_FUNCI_DST_ALPHA_SRC_COLOR                                      0x00004300
+#define   NVC0TCL_BLEND_FUNCI_DST_ALPHA_ONE_MINUS_SRC_COLOR                            0x00004301
+#define   NVC0TCL_BLEND_FUNCI_DST_ALPHA_SRC_ALPHA                                      0x00004302
+#define   NVC0TCL_BLEND_FUNCI_DST_ALPHA_ONE_MINUS_SRC_ALPHA                            0x00004303
+#define   NVC0TCL_BLEND_FUNCI_DST_ALPHA_DST_ALPHA                                      0x00004304
+#define   NVC0TCL_BLEND_FUNCI_DST_ALPHA_ONE_MINUS_DST_ALPHA                            0x00004305
+#define   NVC0TCL_BLEND_FUNCI_DST_ALPHA_DST_COLOR                                      0x00004306
+#define   NVC0TCL_BLEND_FUNCI_DST_ALPHA_ONE_MINUS_DST_COLOR                            0x00004307
+#define   NVC0TCL_BLEND_FUNCI_DST_ALPHA_SRC_ALPHA_SATURATE                             0x00004308
+#define   NVC0TCL_BLEND_FUNCI_DST_ALPHA_CONSTANT_COLOR                                 0x0000c001
+#define   NVC0TCL_BLEND_FUNCI_DST_ALPHA_ONE_MINUS_CONSTANT_COLOR                       0x0000c002
+#define   NVC0TCL_BLEND_FUNCI_DST_ALPHA_CONSTANT_ALPHA                                 0x0000c003
+#define   NVC0TCL_BLEND_FUNCI_DST_ALPHA_ONE_MINUS_CONSTANT_ALPHA                       0x0000c004
+#define   NVC0TCL_BLEND_FUNCI_DST_ALPHA_SRC1_COLOR                                     0x0000c900
+#define   NVC0TCL_BLEND_FUNCI_DST_ALPHA_ONE_MINUS_SRC1_COLOR                           0x0000c901
+#define   NVC0TCL_BLEND_FUNCI_DST_ALPHA_SRC1_ALPHA                                     0x0000c902
+#define   NVC0TCL_BLEND_FUNCI_DST_ALPHA_ONE_MINUS_SRC1_ALPHA                           0x0000c903
+#define  NVC0TCL_SP_SELECT(x)                                                          (0x00002000+((x)*64))
+#define  NVC0TCL_SP_SELECT__SIZE                                                       0x00000006
+#define   NVC0TCL_SP_SELECT_ENABLE                                                     (1 <<  0)
+#define   NVC0TCL_SP_SELECT_PROGRAM_SHIFT                                              4
+#define   NVC0TCL_SP_SELECT_PROGRAM_MASK                                               0x000000f0
+#define  NVC0TCL_SP_START_ID(x)                                                                (0x00002004+((x)*64))
+#define  NVC0TCL_SP_START_ID__SIZE                                                     0x00000006
+#define  NVC0TCL_SP_GPR_ALLOC(x)                                                       (0x0000200c+((x)*64))
+#define  NVC0TCL_SP_GPR_ALLOC__SIZE                                                    0x00000006
+#define  NVC0TCL_CB_SIZE                                                               0x00002380
+#define  NVC0TCL_CB_BIND(x)                                                            (0x00002410+((x)*32))
+#define  NVC0TCL_CB_BIND__SIZE                                                         0x00000005
+#define   NVC0TCL_CB_BIND_VALID                                                                (1 <<  0)
+#define   NVC0TCL_CB_BIND_INDEX_SHIFT                                                  4
+#define   NVC0TCL_CB_BIND_INDEX_MASK                                                   0x000000f0
+#define  NVC0TCL_BIND_TIC(x)                                                           (0x00002404+((x)*32))
+#define  NVC0TCL_BIND_TIC__SIZE                                                                0x00000005
+#define   NVC0TCL_BIND_TIC_ACTIVE                                                      (1 <<  0)
+#define   NVC0TCL_BIND_TIC_TEXTURE_SHIFT                                               1
+#define   NVC0TCL_BIND_TIC_TEXTURE_MASK                                                        0x000001fe
+#define   NVC0TCL_BIND_TIC_TIC_SHIFT                                                   9
+#define   NVC0TCL_BIND_TIC_TIC_MASK                                                    0x7ffffe00
+#define  NVC0TCL_TEX_LIMITS(x)                                                         (0x00002200+((x)*16))
+#define  NVC0TCL_TEX_LIMITS__SIZE                                                      0x00000005
+#define   NVC0TCL_TEX_LIMITS_SAMPLERS_LOG2_SHIFT                                       0
+#define   NVC0TCL_TEX_LIMITS_SAMPLERS_LOG2_MASK                                                0x0000000f
+#define   NVC0TCL_TEX_LIMITS_TEXTURES_LOG2_SHIFT                                       4
+#define   NVC0TCL_TEX_LIMITS_TEXTURES_LOG2_MASK                                                0x000000f0
+#define  NVC0TCL_CB_ADDR_HIGH                                                          0x00002384
+#define  NVC0TCL_CB_ADDR_LOW                                                           0x00002388
+#define  NVC0TCL_CB_POS                                                                        0x0000238c
+#define  NVC0TCL_CB_DATA(x)                                                            (0x00002390+((x)*4))
+#define  NVC0TCL_CB_DATA__SIZE                                                         0x00000010
+#define  NVC0TCL_TFB_VARYING_LOCS(x)                                                   (0x00002800+((x)*4))
+#define  NVC0TCL_TFB_VARYING_LOCS__SIZE                                                        0x00000080
+#define  NVC0TCL_UNK_UPLOAD_POS                                                                0x00003800
+#define  NVC0TCL_UNK_UPLOAD_DATA                                                       0x00003804
+#define  NVC0TCL_VERTEX_ARRAY_SELECT                                                   0x00003820
+#define  NVC0TCL_VERTEX_ARRAY_ADDRESS                                                  0x00003824
+#define  NVC0TCL_BLEND_ENABLEI                                                         0x00003858
+#define  NVC0TCL_POLYGON_MODE_FRONT                                                    0x00003868
+#define   NVC0TCL_POLYGON_MODE_FRONT_POINT                                             0x00001b00
+#define   NVC0TCL_POLYGON_MODE_FRONT_LINE                                              0x00001b01
+#define   NVC0TCL_POLYGON_MODE_FRONT_FILL                                              0x00001b02
+#define  NVC0TCL_POLYGON_MODE_BACK                                                     0x00003870
+#define   NVC0TCL_POLYGON_MODE_BACK_POINT                                              0x00001b00
+#define   NVC0TCL_POLYGON_MODE_BACK_LINE                                               0x00001b01
+#define   NVC0TCL_POLYGON_MODE_BACK_FILL                                               0x00001b02
+#define  NVC0TCL_GP_SELECT                                                             0x00003878
+#define   NVC0TCL_GP_SELECT_ENABLE                                                     (1 <<  0)
+#define   NVC0TCL_GP_SELECT_PROGRAM_SHIFT                                              4
+#define   NVC0TCL_GP_SELECT_PROGRAM_MASK                                               0x000000f0
+#define  NVC0TCL_TEP_SELECT                                                            0x00003880
+#define   NVC0TCL_TEP_SELECT_ENABLE                                                    (1 <<  0)
+#define   NVC0TCL_TEP_SELECT_PROGRAM_SHIFT                                             4
+#define   NVC0TCL_TEP_SELECT_PROGRAM_MASK                                              0x000000f0
+
+
+#define NVC0_COMPUTE                                                                   0x000090c0
+
+#define  NVC0_COMPUTE_NOP                                                              0x00000100
+#define  NVC0_COMPUTE_NOTIFY                                                           0x00000104
+#define  NVC0_COMPUTE_SERIALIZE                                                                0x00000110
+#define  NVC0_COMPUTE_LOCAL_SIZE                                                       0x00000204
+#define  NVC0_COMPUTE_SHARED_BASE                                                      0x00000214
+#define  NVC0_COMPUTE_GRIDDIM_YX                                                       0x00000238
+#define   NVC0_COMPUTE_GRIDDIM_YX_X_SHIFT                                              0
+#define   NVC0_COMPUTE_GRIDDIM_YX_X_MASK                                               0x0000ffff
+#define   NVC0_COMPUTE_GRIDDIM_YX_Y_SHIFT                                              16
+#define   NVC0_COMPUTE_GRIDDIM_YX_Y_MASK                                               0xffff0000
+#define  NVC0_COMPUTE_GRIDDIM_Z                                                                0x0000023c
+#define  NVC0_COMPUTE_SHARED_SIZE                                                      0x0000024c
+#define  NVC0_COMPUTE_BLOCK_ALLOC                                                      0x00000250
+#define   NVC0_COMPUTE_BLOCK_ALLOC_THREADS_SHIFT                                       0
+#define   NVC0_COMPUTE_BLOCK_ALLOC_THREADS_MASK                                                0x0000ffff
+#define   NVC0_COMPUTE_BLOCK_ALLOC_BARRIERS_SHIFT                                      16
+#define   NVC0_COMPUTE_BLOCK_ALLOC_BARRIERS_MASK                                       0xffff0000
+#define  NVC0_COMPUTE_CP_GPR_ALLOC                                                     0x000002c0
+#define  NVC0_COMPUTE_GLOBAL_BASE                                                      0x000002c8
+#define   NVC0_COMPUTE_GLOBAL_BASE_HIGH_SHIFT                                          0
+#define   NVC0_COMPUTE_GLOBAL_BASE_HIGH_MASK                                           0x000000ff
+#define   NVC0_COMPUTE_GLOBAL_BASE_INDEX_SHIFT                                         16
+#define   NVC0_COMPUTE_GLOBAL_BASE_INDEX_MASK                                          0x00ff0000
+#define   NVC0_COMPUTE_GLOBAL_BASE_FLAGS_SHIFT                                         28
+#define   NVC0_COMPUTE_GLOBAL_BASE_FLAGS_MASK                                          0xf0000000
+#define  NVC0_COMPUTE_LAUNCH                                                           0x00000368
+#define  NVC0_COMPUTE_BLOCKDIM_YX                                                      0x000003ac
+#define   NVC0_COMPUTE_BLOCKDIM_YX_X_SHIFT                                             0
+#define   NVC0_COMPUTE_BLOCKDIM_YX_X_MASK                                              0x0000ffff
+#define   NVC0_COMPUTE_BLOCKDIM_YX_Y_SHIFT                                             16
+#define   NVC0_COMPUTE_BLOCKDIM_YX_Y_MASK                                              0xffff0000
+#define  NVC0_COMPUTE_BLOCKDIM_Z                                                       0x000003b0
+#define  NVC0_COMPUTE_CP_START_ID                                                      0x000003b4
+#define  NVC0_COMPUTE_LOCAL_BASE                                                       0x0000077c
+#define  NVC0_COMPUTE_UNK0790_ADDRESS_HIGH                                             0x00000790
+#define  NVC0_COMPUTE_UNK0790_ADDRESS_LOW                                              0x00000794
+#define  NVC0_COMPUTE_LINKED_TSC                                                       0x00001234
+#define  NVC0_COMPUTE_TSC_ADDRESS_HIGH                                                 0x0000155c
+#define  NVC0_COMPUTE_TSC_ADDRESS_LOW                                                  0x00001560
+#define  NVC0_COMPUTE_TSC_LIMIT                                                                0x00001564
+#define  NVC0_COMPUTE_TIC_ADDRESS_HIGH                                                 0x00001574
+#define  NVC0_COMPUTE_TIC_ADDRESS_LOW                                                  0x00001578
+#define  NVC0_COMPUTE_TIC_LIMIT                                                                0x0000157c
+#define  NVC0_COMPUTE_CODE_ADDRESS_HIGH                                                        0x00001608
+#define  NVC0_COMPUTE_CODE_ADDRESS_LOW                                                 0x0000160c
+#define  NVC0_COMPUTE_CB_BIND                                                          0x00001694
+#define   NVC0_COMPUTE_CB_BIND_INDEX_SHIFT                                             1
+#define   NVC0_COMPUTE_CB_BIND_INDEX_MASK                                              0xfffffffe
+#define   NVC0_COMPUTE_CB_BIND_VALID                                                   (1 <<  0)
+#define  NVC0_COMPUTE_QUERY_ADDRESS_HIGH                                               0x00001b00
+#define  NVC0_COMPUTE_QUERY_ADDRESS_LOW                                                        0x00001b04
+#define  NVC0_COMPUTE_QUERY_SEQUENCE                                                   0x00001b08
+#define  NVC0_COMPUTE_QUERY_GET                                                                0x00001b0c
+#define  NVC0_COMPUTE_CB_ADDRESS_HIGH                                                  0x00002384
+#define  NVC0_COMPUTE_CB_ADDRESS_LOW                                                   0x00002388
+#define  NVC0_COMPUTE_CB_POS                                                           0x0000238c
+#define  NVC0_COMPUTE_CB_DATA                                                          0x00002390
+
+
 #endif /* NOUVEAU_REG_H */
index e31e6f8662a15371fc7a5d04ba892b8a3cf59dd6..bf1e8201a08534cb6b7b9d9cfd4186752075330c 100644 (file)
@@ -8,9 +8,9 @@ C_SOURCES = \
        nv50_clear.c \
        nv50_context.c \
        nv50_draw.c \
+       nv50_formats.c \
        nv50_miptree.c \
        nv50_query.c \
-       nv50_program.c \
        nv50_resource.c \
        nv50_screen.c \
        nv50_state.c \
@@ -19,6 +19,14 @@ C_SOURCES = \
        nv50_tex.c \
        nv50_transfer.c \
        nv50_vbo.c \
-       nv50_push.c
+       nv50_push.c \
+       nv50_program.c \
+       nv50_shader_state.c \
+       nv50_pc.c \
+       nv50_pc_print.c \
+       nv50_pc_emit.c \
+       nv50_tgsi_to_nc.c \
+       nv50_pc_optimize.c \
+       nv50_pc_regalloc.c
 
 include ../../Makefile.template
index 8625f926221845853225913c59e89f82c375750c..e4a93c15ce4459b03727f49b87d0a3d0b56f5ea2 100644 (file)
@@ -9,6 +9,7 @@ nv50 = env.ConvenienceLibrary(
         'nv50_clear.c',
         'nv50_context.c',
         'nv50_draw.c',
+        'nv50_formats.c',
         'nv50_miptree.c',
         'nv50_query.c',
         'nv50_program.c',
diff --git a/src/gallium/drivers/nv50/nv50_formats.c b/src/gallium/drivers/nv50/nv50_formats.c
new file mode 100644 (file)
index 0000000..e1c7dae
--- /dev/null
@@ -0,0 +1,452 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nv50_screen.h"
+#include "nv50_texture.h"
+#include "nouveau/nouveau_class.h"
+#include "pipe/p_defines.h"
+
+#define A_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r)          \
+   NV50TIC_0_0_MAPR_##cr | NV50TIC_0_0_TYPER_##t0 |         \
+   NV50TIC_0_0_MAPG_##cg | NV50TIC_0_0_TYPEG_##t1 |        \
+   NV50TIC_0_0_MAPB_##cb | NV50TIC_0_0_TYPEB_##t2 |        \
+   NV50TIC_0_0_MAPA_##ca | NV50TIC_0_0_TYPEA_##t3 |         \
+   NV50TIC_0_0_FMT_##sz,                                    \
+   NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_##sz |                \
+   NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_##t0 |                  \
+   (NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_##t0 << 3) | (r << 31)
+
+#define B_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r)   \
+   NV50TIC_0_0_MAPR_##cr | NV50TIC_0_0_TYPER_##t0 | \
+   NV50TIC_0_0_MAPG_##cg | NV50TIC_0_0_TYPEG_##t1 | \
+   NV50TIC_0_0_MAPB_##cb | NV50TIC_0_0_TYPEB_##t2 | \
+   NV50TIC_0_0_MAPA_##ca | NV50TIC_0_0_TYPEA_##t3 | \
+   NV50TIC_0_0_FMT_##sz, 0
+
+#define VERTEX_BUFFER PIPE_BIND_VERTEX_BUFFER
+#define SAMPLER_VIEW  PIPE_BIND_SAMPLER_VIEW
+#define RENDER_TARGET PIPE_BIND_RENDER_TARGET
+#define DEPTH_STENCIL PIPE_BIND_DEPTH_STENCIL
+#define SCANOUT       PIPE_BIND_SCANOUT
+
+/* for vertex buffers: */
+#define NV50TIC_0_0_FMT_8_8_8 NV50TIC_0_0_FMT_8_8_8_8
+#define NV50TIC_0_0_FMT_16_16_16 NV50TIC_0_0_FMT_16_16_16_16
+#define NV50TIC_0_0_FMT_32_32_32 NV50TIC_0_0_FMT_32_32_32_32
+
+const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
+{
+   /* COMMON FORMATS */
+
+   [PIPE_FORMAT_B8G8R8A8_UNORM] = { NV50TCL_RT_FORMAT_A8R8G8B8_UNORM,
+    A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
+
+   [PIPE_FORMAT_B8G8R8X8_UNORM] = { NV50TCL_RT_FORMAT_X8R8G8B8_UNORM,
+    A_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
+
+   [PIPE_FORMAT_B8G8R8A8_SRGB] = { NV50TCL_RT_FORMAT_A8R8G8B8_SRGB,
+    A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_B8G8R8X8_SRGB] = { NV50TCL_RT_FORMAT_X8R8G8B8_SRGB,
+    A_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_B5G6R5_UNORM] = { NV50TCL_RT_FORMAT_R5G6B5_UNORM,
+    B_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 5_6_5, 1),
+    SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
+
+   [PIPE_FORMAT_B5G5R5A1_UNORM] = { NV50TCL_RT_FORMAT_A1R5G5B5_UNORM,
+    B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 1_5_5_5, 1),
+    SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
+
+   [PIPE_FORMAT_B4G4R4A4_UNORM] = { 0,
+    B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 4_4_4_4, 1),
+    SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R10G10B10A2_UNORM] = { NV50TCL_RT_FORMAT_A2B10G10R10_UNORM,
+    A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 0),
+    SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER | SCANOUT },
+
+   [PIPE_FORMAT_B10G10R10A2_UNORM] = { NV50TCL_RT_FORMAT_A2R10G10B10_UNORM,
+    A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 1),
+    SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER },
+
+   /* DEPTH/STENCIL FORMATS */
+
+   [PIPE_FORMAT_Z16_UNORM] = { NV50TCL_ZETA_FORMAT_Z16_UNORM,
+    B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 16_DEPTH, 0),
+    SAMPLER_VIEW | DEPTH_STENCIL },
+
+   [PIPE_FORMAT_Z24_UNORM_S8_USCALED] = { NV50TCL_ZETA_FORMAT_S8Z24_UNORM,
+    B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 8_24, 0),
+    SAMPLER_VIEW | DEPTH_STENCIL },
+
+   [PIPE_FORMAT_Z24X8_UNORM] = { NV50TCL_ZETA_FORMAT_X8Z24_UNORM,
+    B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 8_24, 0),
+    SAMPLER_VIEW | DEPTH_STENCIL },
+
+   [PIPE_FORMAT_S8_USCALED_Z24_UNORM] = { NV50TCL_ZETA_FORMAT_S8Z24_UNORM,
+    B_(C1, C1, C1, ONE, UINT, UNORM, UINT, UINT, 24_8, 0),
+    SAMPLER_VIEW | DEPTH_STENCIL },
+
+   [PIPE_FORMAT_Z32_FLOAT] = { NV50TCL_ZETA_FORMAT_Z32_FLOAT,
+    B_(C0, C0, C0, ONE, FLOAT, UINT, UINT, UINT, 32_DEPTH, 0),
+    SAMPLER_VIEW | DEPTH_STENCIL },
+
+   [PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED] = {
+    NV50TCL_ZETA_FORMAT_Z32_FLOAT_X24S8_UNORM,
+    B_(C0, C0, C0, ONE, FLOAT, UINT, UINT, UINT, 32_8, 0),
+    SAMPLER_VIEW | DEPTH_STENCIL },
+
+   /* LUMINANCE, ALPHA, INTENSITY */
+
+   [PIPE_FORMAT_L8_UNORM] = { 0,
+    A_(C0, C0, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0),
+    SAMPLER_VIEW },
+
+   [PIPE_FORMAT_L8_SRGB] = { 0,
+    A_(C0, C0, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0),
+    SAMPLER_VIEW },
+
+   [PIPE_FORMAT_I8_UNORM] = { 0,
+    A_(C0, C0, C0, C0, UNORM, UNORM, UNORM, UNORM, 8, 0),
+    SAMPLER_VIEW },
+
+   [PIPE_FORMAT_A8_UNORM] = { NV50TCL_RT_FORMAT_A8_UNORM,
+    A_(ZERO, ZERO, ZERO, C0, UNORM, UNORM, UNORM, UNORM, 8, 0),
+    SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_L8A8_UNORM] = { 0,
+    A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0),
+    SAMPLER_VIEW },
+
+   [PIPE_FORMAT_L8A8_SRGB] = { 0,
+    A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0),
+    SAMPLER_VIEW },
+
+   /* DXT, RGTC */
+
+   [PIPE_FORMAT_DXT1_RGB] = { 0,
+    B_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, DXT1, 0),
+    SAMPLER_VIEW },
+
+   [PIPE_FORMAT_DXT1_RGBA] = { 0,
+    B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT1, 0),
+    SAMPLER_VIEW },
+
+   [PIPE_FORMAT_DXT3_RGBA] = { 0,
+    B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT3, 0),
+    SAMPLER_VIEW },
+
+   [PIPE_FORMAT_DXT5_RGBA] = { 0,
+    B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT5, 0),
+    SAMPLER_VIEW },
+
+   [PIPE_FORMAT_RGTC1_UNORM] = { 0,
+    B_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, RGTC1, 0),
+    SAMPLER_VIEW },
+
+   [PIPE_FORMAT_RGTC1_SNORM] = { 0,
+    B_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, RGTC1, 0),
+    SAMPLER_VIEW },
+
+   [PIPE_FORMAT_RGTC2_UNORM] = { 0,
+    B_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, RGTC2, 0),
+    SAMPLER_VIEW },
+
+   [PIPE_FORMAT_RGTC2_SNORM] = { 0,
+    B_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, RGTC2, 0),
+    SAMPLER_VIEW },
+
+   /* FLOAT 16 */
+
+   [PIPE_FORMAT_R16G16B16A16_FLOAT] = { NV50TCL_RT_FORMAT_R16G16B16A16_FLOAT,
+    A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16_16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R16G16B16_FLOAT] = { NV50TCL_RT_FORMAT_R16G16B16X16_FLOAT,
+    A_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R16G16_FLOAT] = { NV50TCL_RT_FORMAT_R16G16_FLOAT,
+    A_(C0, C1, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16_16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R16_FLOAT] = { NV50TCL_RT_FORMAT_R16_FLOAT,
+    A_(C0, ZERO, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   /* FLOAT 32 */
+
+   [PIPE_FORMAT_R32G32B32A32_FLOAT] = { NV50TCL_RT_FORMAT_R32G32B32A32_FLOAT,
+    A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R32G32B32_FLOAT] = { NV50TCL_RT_FORMAT_R32G32B32X32_FLOAT,
+    A_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R32G32_FLOAT] = { NV50TCL_RT_FORMAT_R32G32_FLOAT,
+    A_(C0, C1, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32_32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R32_FLOAT] = { NV50TCL_RT_FORMAT_R32_FLOAT,
+    A_(C0, ZERO, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   /* ODD FORMATS */
+
+   [PIPE_FORMAT_R11G11B10_FLOAT] = { NV50TCL_RT_FORMAT_B10G11R11_FLOAT,
+    B_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 10_11_11, 0),
+    SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R9G9B9E5_FLOAT] = { 0,
+    B_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 5_9_9_9, 0),
+    SAMPLER_VIEW },
+
+   /* SNORM 32 */
+
+   [PIPE_FORMAT_R32G32B32A32_SNORM] = { 0,
+    A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R32G32B32_SNORM] = { 0,
+    A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 32_32_32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R32G32_SNORM] = { 0,
+    A_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 32_32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R32_SNORM] = { 0,
+    A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   /* UNORM 32 */
+
+   [PIPE_FORMAT_R32G32B32A32_UNORM] = { 0,
+    A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R32G32B32_UNORM] = { 0,
+    A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 32_32_32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R32G32_UNORM] = { 0,
+    A_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 32_32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R32_UNORM] = { 0,
+    A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   /* SNORM 16 */
+
+   [PIPE_FORMAT_R16G16B16A16_SNORM] = { NV50TCL_RT_FORMAT_R16G16B16A16_SNORM,
+    A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16_16_16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R16G16B16_SNORM] = { 0,
+    A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 16_16_16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R16G16_SNORM] = { NV50TCL_RT_FORMAT_R16G16_SNORM,
+    A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R16_SNORM] = { 0,
+    A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   /* UNORM 16 */
+
+   [PIPE_FORMAT_R16G16B16A16_UNORM] = { NV50TCL_RT_FORMAT_R16G16B16A16_UNORM,
+    A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16_16_16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R16G16B16_UNORM] = { 0,
+    A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 16_16_16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R16G16_UNORM] = { NV50TCL_RT_FORMAT_R16G16_UNORM,
+    A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R16_UNORM] = { 0,
+    A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   /* SNORM 8 */
+
+   [PIPE_FORMAT_R8G8B8A8_SNORM] = { NV50TCL_RT_FORMAT_A8B8G8R8_SNORM,
+    A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 8_8_8_8, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R8G8B8_SNORM] = { 0,
+    A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 8_8_8, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R8G8_SNORM] = { NV50TCL_RT_FORMAT_R8G8_SNORM,
+    A_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 8_8, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R8_SNORM] = { NV50TCL_RT_FORMAT_R8_SNORM,
+    A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 8, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   /* UNORM 8 */
+
+   [PIPE_FORMAT_R8G8B8A8_UNORM] = { NV50TCL_RT_FORMAT_A8B8G8R8_UNORM,
+    A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R8G8B8A8_SRGB] = { NV50TCL_RT_FORMAT_A8B8G8R8_SRGB,
+    A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0),
+    SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R8G8B8_UNORM] = { NV50TCL_RT_FORMAT_X8B8G8R8_UNORM,
+    A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R8G8B8_SRGB] = { NV50TCL_RT_FORMAT_X8B8G8R8_SRGB,
+    A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0),
+    SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R8G8_UNORM] = { NV50TCL_RT_FORMAT_R8G8_UNORM,
+    A_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 8_8, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   [PIPE_FORMAT_R8_UNORM] = { NV50TCL_RT_FORMAT_R8_UNORM,
+    A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+   /* SSCALED 32 */
+
+   [PIPE_FORMAT_R32G32B32A32_SSCALED] = { 0,
+    A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 32_32_32_32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R32G32B32_SSCALED] = { 0,
+    A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32_32_32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R32G32_SSCALED] = { 0,
+    A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32_32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R32_SSCALED] = { 0,
+    A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   /* USCALED 32 */
+
+   [PIPE_FORMAT_R32G32B32A32_USCALED] = { 0,
+    A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 32_32_32_32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R32G32B32_USCALED] = { 0,
+    A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 32_32_32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R32G32_USCALED] = { 0,
+    A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 32_32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R32_USCALED] = { 0,
+    A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 32, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   /* SSCALED 16 */
+
+   [PIPE_FORMAT_R16G16B16A16_SSCALED] = { 0,
+    A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 16_16_16_16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R16G16B16_SSCALED] = { 0,
+    A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16_16_16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R16G16_SSCALED] = { 0,
+    A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16_16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R16_SSCALED] = { 0,
+    A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   /* USCALED 16 */
+
+   [PIPE_FORMAT_R16G16B16A16_USCALED] = { 0,
+    A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 16_16_16_16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R16G16B16_USCALED] = { 0,
+    A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 16_16_16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R16G16_USCALED] = { 0,
+    A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 16_16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R16_USCALED] = { 0,
+    A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 16, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   /* SSCALED 8 */
+
+   [PIPE_FORMAT_R8G8B8A8_SSCALED] = { 0,
+    A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 8_8_8_8, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R8G8B8_SSCALED] = { 0,
+    A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8_8_8, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R8G8_SSCALED] = { 0,
+    A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8_8, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R8_SSCALED] = { 0,
+    A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   /* USCALED 8 */
+
+   [PIPE_FORMAT_R8G8B8A8_USCALED] = { 0,
+    A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 8_8_8_8, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R8G8B8_USCALED] = { 0,
+    A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 8_8_8, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R8G8_USCALED] = { 0,
+    A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 8_8, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+
+   [PIPE_FORMAT_R8_USCALED] = { 0,
+    A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 8, 0),
+    VERTEX_BUFFER | SAMPLER_VIEW },
+};
index c0f5cc10dd7d4dcf4743947191042f07cb4aff43..dd0e8fd41b1b4a238853bc1ab804770397106602 100644 (file)
@@ -159,6 +159,9 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_resource *tmp
        case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
                tile_flags = 0x2800;
                break;
+       case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED:
+               tile_flags = 0xe000;
+               break;
        case PIPE_FORMAT_R32G32B32A32_FLOAT:
        case PIPE_FORMAT_R32G32B32_FLOAT:
                tile_flags = 0x7400;
diff --git a/src/gallium/drivers/nv50/nv50_pc.c b/src/gallium/drivers/nv50/nv50_pc.c
new file mode 100644 (file)
index 0000000..e34c055
--- /dev/null
@@ -0,0 +1,677 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* #define NV50PC_DEBUG */
+
+#include "nv50_pc.h"
+#include "nv50_program.h"
+
+#include <stdio.h>
+
+/* returns TRUE if operands 0 and 1 can be swapped */
+boolean
+nv_op_commutative(uint opcode)
+{
+   switch (opcode) {
+   case NV_OP_ADD:
+   case NV_OP_MUL:
+   case NV_OP_MAD:
+   case NV_OP_AND:
+   case NV_OP_OR:
+   case NV_OP_XOR:
+   case NV_OP_MIN:
+   case NV_OP_MAX:
+   case NV_OP_SAD:
+     return TRUE;
+   default:
+     return FALSE;
+   }
+}
+
+/* return operand to which the address register applies */
+int
+nv50_indirect_opnd(struct nv_instruction *i)
+{
+   if (!i->src[4])
+      return -1;
+
+   switch (i->opcode) {
+   case NV_OP_MOV:
+   case NV_OP_LDA:
+      return 0;
+   default:
+      return 1;
+   }
+}
+
+boolean
+nv50_nvi_can_use_imm(struct nv_instruction *nvi, int s)
+{
+   if (nvi->flags_src || nvi->flags_def)
+      return FALSE;
+
+   switch (nvi->opcode) {
+   case NV_OP_ADD:
+   case NV_OP_MUL:
+   case NV_OP_AND:
+   case NV_OP_OR:
+   case NV_OP_XOR:
+   case NV_OP_SHL:
+   case NV_OP_SHR:
+      return (s == 1) && (nvi->src[0]->value->reg.file == NV_FILE_GPR) &&
+         (nvi->def[0]->reg.file == NV_FILE_GPR);
+   case NV_OP_MOV:
+      assert(s == 0);
+      return (nvi->def[0]->reg.file == NV_FILE_GPR);
+   default:
+      return FALSE;
+   }
+}
+
+boolean
+nv50_nvi_can_load(struct nv_instruction *nvi, int s, struct nv_value *value)
+{
+   int i;
+
+   for (i = 0; i < 3 && nvi->src[i]; ++i)
+      if (nvi->src[i]->value->reg.file == NV_FILE_IMM)
+         return FALSE;
+
+   switch (nvi->opcode) {
+   case NV_OP_ABS:
+   case NV_OP_ADD:
+   case NV_OP_CEIL:
+   case NV_OP_FLOOR:
+   case NV_OP_TRUNC:
+   case NV_OP_CVT:
+   case NV_OP_MAD:
+   case NV_OP_MUL:
+   case NV_OP_SAT:
+   case NV_OP_SUB:
+   case NV_OP_MAX:
+   case NV_OP_MIN:
+      if (s == 0 && (value->reg.file == NV_FILE_MEM_S ||
+                     value->reg.file == NV_FILE_MEM_P))
+         return TRUE;
+      if (s == 1 &&
+          value->reg.file >= NV_FILE_MEM_C(0) &&
+          value->reg.file <= NV_FILE_MEM_C(15))
+         return TRUE;
+      if (s == 2 && nvi->src[1]->value->reg.file == NV_FILE_GPR)
+         return TRUE;
+      return FALSE;
+   case NV_OP_MOV:
+      assert(s == 0);
+      return /* TRUE */ FALSE; /* don't turn MOVs into loads */
+   default:
+      return FALSE;
+   }
+}
+
+/* Return whether this instruction can be executed conditionally. */
+boolean
+nv50_nvi_can_predicate(struct nv_instruction *nvi)
+{
+   int i;
+
+   if (nvi->flags_src)
+      return FALSE;
+   for (i = 0; i < 4 && nvi->src[i]; ++i)
+      if (nvi->src[i]->value->reg.file == NV_FILE_IMM)
+         return FALSE;
+   return TRUE;
+}
+
+ubyte
+nv50_supported_src_mods(uint opcode, int s)
+{
+   switch (opcode) {
+   case NV_OP_ABS:
+      return NV_MOD_NEG | NV_MOD_ABS; /* obviously */
+   case NV_OP_ADD:
+   case NV_OP_MUL:
+   case NV_OP_MAD:
+      return NV_MOD_NEG;
+   case NV_OP_DFDX:
+   case NV_OP_DFDY:
+      assert(s == 0);
+      return NV_MOD_NEG;
+   case NV_OP_MAX:
+   case NV_OP_MIN:
+      return NV_MOD_ABS;
+   case NV_OP_CVT:
+   case NV_OP_LG2:
+   case NV_OP_NEG:
+   case NV_OP_PREEX2:
+   case NV_OP_PRESIN:
+   case NV_OP_RCP:
+   case NV_OP_RSQ:
+      return NV_MOD_ABS | NV_MOD_NEG;
+   default:
+      return 0;
+   }
+}
+
+int
+nv_nvi_refcount(struct nv_instruction *nvi)
+{
+   int i, rc;
+
+   rc = nvi->flags_def ? nvi->flags_def->refc : 0;
+
+   for (i = 0; i < 4; ++i) {
+      if (!nvi->def[i])
+         return rc;
+      rc += nvi->def[i]->refc;
+   }
+   return rc;
+}
+
+int
+nvcg_replace_value(struct nv_pc *pc, struct nv_value *old_val,
+                   struct nv_value *new_val)
+{
+   int i, n;
+
+   if (old_val == new_val)
+      return old_val->refc;
+
+   for (i = 0, n = 0; i < pc->num_refs; ++i) {
+      if (pc->refs[i]->value == old_val) {
+         ++n;
+         nv_reference(pc, &pc->refs[i], new_val);
+      }
+   }
+   return n;
+}
+
+struct nv_value *
+nvcg_find_constant(struct nv_ref *ref)
+{
+   struct nv_value *src;
+
+   if (!ref)
+      return NULL;
+
+   src = ref->value;
+   while (src->insn && src->insn->opcode == NV_OP_MOV) {
+      assert(!src->insn->src[0]->mod);
+      src = src->insn->src[0]->value;
+   }
+   if ((src->reg.file == NV_FILE_IMM) ||
+       (src->insn && src->insn->opcode == NV_OP_LDA &&
+        src->insn->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
+        src->insn->src[0]->value->reg.file <= NV_FILE_MEM_C(15)))
+      return src;
+   return NULL;
+}
+
+struct nv_value *
+nvcg_find_immediate(struct nv_ref *ref)
+{
+   struct nv_value *src = nvcg_find_constant(ref);
+
+   return (src && src->reg.file == NV_FILE_IMM) ? src : NULL;
+}
+
+static void
+nv_pc_free_refs(struct nv_pc *pc)
+{
+   int i;
+   for (i = 0; i < pc->num_refs; i += 64)
+      FREE(pc->refs[i]);
+}
+
+static const char *
+edge_name(ubyte type)
+{
+   switch (type) {
+   case CFG_EDGE_FORWARD: return "forward";
+   case CFG_EDGE_BACK: return "back";
+   case CFG_EDGE_LOOP_ENTER: return "loop";
+   case CFG_EDGE_LOOP_LEAVE: return "break";
+   case CFG_EDGE_FAKE: return "fake";
+   default:
+      return "?";
+   }
+}
+
+void
+nv_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, void *priv)
+{
+   struct nv_basic_block *bb[64], *bbb[16], *b;
+   int j, p, pp;
+
+   bb[0] = root;
+   p = 1;
+   pp = 0;
+
+   while (p > 0) {
+      b = bb[--p];
+      b->priv = 0;
+
+      for (j = 1; j >= 0; --j) {
+         if (!b->out[j])
+            continue;
+
+         switch (b->out_kind[j]) {
+         case CFG_EDGE_BACK:
+            continue;
+         case CFG_EDGE_FORWARD:
+         case CFG_EDGE_FAKE:
+            if (++b->out[j]->priv == b->out[j]->num_in)
+               bb[p++] = b->out[j];
+            break;
+         case CFG_EDGE_LOOP_ENTER:
+            bb[p++] = b->out[j];
+            break;
+         case CFG_EDGE_LOOP_LEAVE:
+            bbb[pp++] = b->out[j];
+            break;
+         default:
+            assert(0);
+            break;
+         }
+      }
+
+      f(priv, b);
+
+      if (!p) {
+         p = pp;
+         for (; pp > 0; --pp)
+            bb[pp - 1] = bbb[pp - 1];
+      }
+   }
+}
+
+static void
+nv_do_print_program(void *priv, struct nv_basic_block *b)
+{
+   struct nv_instruction *i = b->phi;
+
+   debug_printf("=== BB %i ", b->id);
+   if (b->out[0])
+      debug_printf("[%s -> %i] ", edge_name(b->out_kind[0]), b->out[0]->id);
+   if (b->out[1])
+      debug_printf("[%s -> %i] ", edge_name(b->out_kind[1]), b->out[1]->id);
+   debug_printf("===\n");
+
+   i = b->phi;
+   if (!i)
+      i = b->entry;
+   for (; i; i = i->next)
+      nv_print_instruction(i);
+}
+
+void
+nv_print_program(struct nv_basic_block *root)
+{
+   nv_pc_pass_in_order(root, nv_do_print_program, root);
+
+   debug_printf("END\n\n");
+}
+
+static INLINE void
+nvcg_show_bincode(struct nv_pc *pc)
+{
+   int i;
+
+   for (i = 0; i < pc->bin_size / 4; ++i)
+      debug_printf("0x%08x ", pc->emit[i]);
+   debug_printf("\n");
+}
+
+static int
+nv50_emit_program(struct nv_pc *pc)
+{
+   uint32_t *code = pc->emit;
+   int n;
+
+   NV50_DBGMSG("emitting program: size = %u\n", pc->bin_size);
+
+   for (n = 0; n < pc->num_blocks; ++n) {
+      struct nv_instruction *i;
+      struct nv_basic_block *b = pc->bb_list[n];
+
+      for (i = b->entry; i; i = i->next) {
+         nv50_emit_instruction(pc, i);
+
+         pc->bin_pos += 1 + (pc->emit[0] & 1);
+         pc->emit += 1 + (pc->emit[0] & 1);
+      }
+   }
+   assert(pc->emit == &code[pc->bin_size / 4]);
+
+   /* XXX: we can do better than this ... */
+   if (!(pc->emit[-2] & 1) || (pc->emit[-2] & 2) || (pc->emit[-1] & 3)) {
+      pc->emit[0] = 0xf0000001;
+      pc->emit[1] = 0xe0000000;
+      pc->bin_size += 8;
+   }
+
+   pc->emit = code;
+   code[pc->bin_size / 4 - 1] |= 1;
+
+#ifdef NV50PC_DEBUG
+   nvcg_show_bincode(pc);
+#endif
+
+   return 0;
+}
+
+int
+nv50_generate_code(struct nv50_translation_info *ti)
+{
+   struct nv_pc *pc;
+   int ret;
+
+   pc = CALLOC_STRUCT(nv_pc);
+   if (!pc)
+      return 1;
+
+   ret = nv50_tgsi_to_nc(pc, ti);
+   if (ret)
+      goto out;
+#ifdef NV50PC_DEBUG
+   nv_print_program(pc->root);
+#endif
+
+   /* optimization */
+   ret = nv_pc_exec_pass0(pc);
+   if (ret)
+      goto out;
+#ifdef NV50PC_DEBUG
+   nv_print_program(pc->root);
+#endif
+
+   /* register allocation */
+   ret = nv_pc_exec_pass1(pc);
+   if (ret)
+      goto out;
+#ifdef NV50PC_DEBUG
+   nv_print_program(pc->root);
+#endif
+
+   /* prepare for emission */
+   ret = nv_pc_exec_pass2(pc);
+   if (ret)
+      goto out;
+
+   pc->emit = CALLOC(pc->bin_size / 4 + 2, 4);
+   if (!pc->emit) {
+      ret = 3;
+      goto out;
+   }
+   ret = nv50_emit_program(pc);
+   if (ret)
+      goto out;
+
+   ti->p->code_size = pc->bin_size;
+   ti->p->code = pc->emit;
+
+   ti->p->immd_size = pc->immd_count * 4;
+   ti->p->immd = pc->immd_buf;
+
+   /* highest 16 bit reg to num of 32 bit regs */
+   ti->p->max_gpr = (pc->max_reg[NV_FILE_GPR] >> 1) + 1;
+
+   ti->p->fixups = pc->fixups;
+   ti->p->num_fixups = pc->num_fixups;
+
+   NV50_DBGMSG("SHADER TRANSLATION - %s\n", ret ? "failure" : "success");
+
+out:
+   nv_pc_free_refs(pc);
+   if (ret) {
+      if (pc->emit)
+         free(pc->emit);
+      if (pc->immd_buf)
+         free(pc->immd_buf);
+      if (pc->fixups)
+         free(pc->fixups);
+   }
+   free(pc);
+
+   return ret;
+}
+
+static void
+nvbb_insert_phi(struct nv_basic_block *b, struct nv_instruction *i)
+{
+   if (!b->phi) {
+      i->prev = NULL;
+      b->phi = i;
+      i->next = b->entry;
+      if (b->entry) {
+         assert(!b->entry->prev && b->exit);
+         b->entry->prev = i;
+      } else {
+         b->entry = i;
+         b->exit = i;
+      }
+   } else {
+      assert(b->entry);
+      if (b->entry->opcode == NV_OP_PHI) { /* insert after entry */
+         assert(b->entry == b->exit);
+         b->entry->next = i;
+         i->prev = b->entry;
+         b->entry = i;
+         b->exit = i;
+      } else { /* insert before entry */
+         assert(b->entry->prev && b->exit);
+         i->next = b->entry;
+         i->prev = b->entry->prev;
+         b->entry->prev = i;
+         i->prev->next = i;
+      }
+   }
+}
+
+void
+nvbb_insert_tail(struct nv_basic_block *b, struct nv_instruction *i)
+{
+   if (i->opcode == NV_OP_PHI) {
+      nvbb_insert_phi(b, i);
+   } else {
+      i->prev = b->exit;
+      if (b->exit)
+         b->exit->next = i;
+      b->exit = i;
+      if (!b->entry)
+         b->entry = i;
+      else
+      if (i->prev && i->prev->opcode == NV_OP_PHI)
+         b->entry = i;
+   }
+
+   i->bb = b;
+   b->num_instructions++;
+}
+
+void
+nvi_insert_after(struct nv_instruction *at, struct nv_instruction *ni)
+{
+   if (!at->next) {
+      nvbb_insert_tail(at->bb, ni);
+      return;
+   }
+   ni->next = at->next;
+   ni->prev = at;
+   ni->next->prev = ni;
+   ni->prev->next = ni;
+}
+
+void
+nv_nvi_delete(struct nv_instruction *nvi)
+{
+   struct nv_basic_block *b = nvi->bb;
+   int j;
+
+   /* debug_printf("REM: "); nv_print_instruction(nvi); */
+
+   for (j = 0; j < 5; ++j)
+      nv_reference(NULL, &nvi->src[j], NULL);
+   nv_reference(NULL, &nvi->flags_src, NULL);
+
+   if (nvi->next)
+      nvi->next->prev = nvi->prev;
+   else {
+      assert(nvi == b->exit);
+      b->exit = nvi->prev;
+   }
+
+   if (nvi->prev)
+      nvi->prev->next = nvi->next;
+
+   if (nvi == b->entry) {
+      /* PHIs don't get hooked to b->entry */
+      b->entry = nvi->next;
+      assert(!nvi->prev || nvi->prev->opcode == NV_OP_PHI);
+   }
+
+   if (nvi == b->phi) {
+      if (nvi->opcode != NV_OP_PHI)
+         NV50_DBGMSG("NOTE: b->phi points to non-PHI instruction\n");
+
+      assert(!nvi->prev);
+      if (!nvi->next || nvi->next->opcode != NV_OP_PHI)
+         b->phi = NULL;
+      else
+         b->phi = nvi->next;
+   }
+}
+
+void
+nv_nvi_permute(struct nv_instruction *i1, struct nv_instruction *i2)
+{
+   struct nv_basic_block *b = i1->bb;
+
+   assert(i1->opcode != NV_OP_PHI &&
+          i2->opcode != NV_OP_PHI);
+   assert(i1->next == i2);
+
+   if (b->exit == i2)
+      b->exit = i1;
+
+   if (b->entry == i1)
+      b->entry = i2;
+
+   i2->prev = i1->prev;
+   i1->next = i2->next;
+   i2->next = i1;
+   i1->prev = i2;
+
+   if (i2->prev)
+      i2->prev->next = i2;
+   if (i1->next)
+      i1->next->prev = i1;
+}
+
+void
+nvbb_attach_block(struct nv_basic_block *parent,
+                  struct nv_basic_block *b, ubyte edge_kind)
+{
+   assert(b->num_in < 8);
+
+   if (parent->out[0]) {
+      assert(!parent->out[1]);
+      parent->out[1] = b;
+      parent->out_kind[1] = edge_kind;
+   } else {
+      parent->out[0] = b;
+      parent->out_kind[0] = edge_kind;
+   }
+
+   b->in[b->num_in] = parent;
+   b->in_kind[b->num_in++] = edge_kind;
+}
+
+/* NOTE: all BRKs are treated as conditional, so there are 2 outgoing BBs */
+
+boolean
+nvbb_dominated_by(struct nv_basic_block *b, struct nv_basic_block *d)
+{
+   int j;
+
+   if (b == d)
+      return TRUE;
+
+   for (j = 0; j < b->num_in; ++j)
+      if ((b->in_kind[j] != CFG_EDGE_BACK) && !nvbb_dominated_by(b->in[j], d))
+         return FALSE;
+
+   return j ? TRUE : FALSE;
+}
+
+/* check if bf (future) can be reached from bp (past) */
+boolean
+nvbb_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp,
+                  struct nv_basic_block *bt)
+{
+   if (bf == bp)
+      return TRUE;
+   if (bp == bt)
+      return FALSE;
+
+   if (bp->out[0] && !IS_WALL_EDGE(bp->out_kind[0]) &&
+       nvbb_reachable_by(bf, bp->out[0], bt))
+      return TRUE;
+   if (bp->out[1] && !IS_WALL_EDGE(bp->out_kind[1]) &&
+       nvbb_reachable_by(bf, bp->out[1], bt))
+      return TRUE;
+   return FALSE;
+}
+
+static struct nv_basic_block *
+nvbb_find_dom_frontier(struct nv_basic_block *b, struct nv_basic_block *df)
+{
+   struct nv_basic_block *out;
+   int i;
+
+   if (!nvbb_dominated_by(df, b)) {
+      for (i = 0; i < df->num_in; ++i) {
+         if (df->in_kind[i] == CFG_EDGE_BACK)
+            continue;
+         if (nvbb_dominated_by(df->in[i], b))
+            return df;
+      }
+   }
+   for (i = 0; i < 2 && df->out[i]; ++i) {
+      if (df->out_kind[i] == CFG_EDGE_BACK)
+         continue;
+      if ((out = nvbb_find_dom_frontier(b, df->out[i])))
+         return out;
+   }
+   return NULL;
+}
+
+struct nv_basic_block *
+nvbb_dom_frontier(struct nv_basic_block *b)
+{
+   struct nv_basic_block *df;
+   int i;
+
+   for (i = 0; i < 2 && b->out[i]; ++i)
+      if ((df = nvbb_find_dom_frontier(b, b->out[i])))
+         return df;
+   return NULL;
+}
diff --git a/src/gallium/drivers/nv50/nv50_pc.h b/src/gallium/drivers/nv50/nv50_pc.h
new file mode 100644 (file)
index 0000000..703d32d
--- /dev/null
@@ -0,0 +1,498 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NV50_COMPILER_H__
+#define __NV50_COMPILER_H__
+
+#ifdef NV50PC_DEBUG
+# define NV50_DBGMSG(args...) debug_printf(args)
+#else
+# define NV50_DBGMSG(args...)
+#endif
+
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+
+#define NV_OP_PHI       0
+#define NV_OP_EXTRACT   1
+#define NV_OP_COMBINE   2
+#define NV_OP_LDA       3
+#define NV_OP_STA       4
+#define NV_OP_MOV       5
+#define NV_OP_ADD       6
+#define NV_OP_SUB       7
+#define NV_OP_NEG       8
+#define NV_OP_MUL       9
+#define NV_OP_MAD       10
+#define NV_OP_CVT       11
+#define NV_OP_SAT       12
+#define NV_OP_NOT       13
+#define NV_OP_AND       14
+#define NV_OP_OR        15
+#define NV_OP_XOR       16
+#define NV_OP_SHL       17
+#define NV_OP_SHR       18
+#define NV_OP_RCP       19
+#define NV_OP_UNDEF     20
+#define NV_OP_RSQ       21
+#define NV_OP_LG2       22
+#define NV_OP_SIN       23
+#define NV_OP_COS       24
+#define NV_OP_EX2       25
+#define NV_OP_PRESIN    26
+#define NV_OP_PREEX2    27
+#define NV_OP_MIN       28
+#define NV_OP_MAX       29
+#define NV_OP_SET       30
+#define NV_OP_SAD       31
+#define NV_OP_KIL       32
+#define NV_OP_BRA       33
+#define NV_OP_CALL      34
+#define NV_OP_RET       35
+#define NV_OP_BREAK     36
+#define NV_OP_BREAKADDR 37
+#define NV_OP_JOINAT    38
+#define NV_OP_TEX       39
+#define NV_OP_TXB       40
+#define NV_OP_TXL       41
+#define NV_OP_TXF       42
+#define NV_OP_TXQ       43
+#define NV_OP_DFDX      44
+#define NV_OP_DFDY      45
+#define NV_OP_QUADOP    46
+#define NV_OP_LINTERP   47
+#define NV_OP_PINTERP   48
+#define NV_OP_ABS       49
+#define NV_OP_CEIL      50
+#define NV_OP_FLOOR     51
+#define NV_OP_TRUNC     52
+#define NV_OP_NOP       53
+#define NV_OP_SELECT    54
+#define NV_OP_EXPORT    55
+#define NV_OP_JOIN      56
+#define NV_OP_COUNT     57
+
+#define NV_FILE_GPR      0
+#define NV_FILE_OUT      1
+#define NV_FILE_ADDR     2
+#define NV_FILE_FLAGS    3
+#define NV_FILE_IMM      16
+#define NV_FILE_MEM_S    32
+#define NV_FILE_MEM_P    33
+#define NV_FILE_MEM_V    34
+#define NV_FILE_MEM_L    48
+#define NV_FILE_MEM_G(i) (64 + i)
+#define NV_FILE_MEM_C(i) (80 + i)
+
+#define NV_MOD_NEG 1
+#define NV_MOD_ABS 2
+#define NV_MOD_NOT 4
+#define NV_MOD_SAT 8
+
+#define NV_TYPE_U8  0x00
+#define NV_TYPE_S8  0x01
+#define NV_TYPE_U16 0x02
+#define NV_TYPE_S16 0x03
+#define NV_TYPE_U32 0x04
+#define NV_TYPE_S32 0x05
+#define NV_TYPE_P32 0x07
+#define NV_TYPE_F32 0x09
+#define NV_TYPE_F64 0x0b
+#define NV_TYPE_VEC(x, n) (NV_TYPE_##x | (n << 4))
+#define NV_TYPE_LO  0x00
+#define NV_TYPE_HI  0x80
+#define NV_TYPE_ANY 0xff
+
+#define NV_TYPE_ISINT(t) ((t) <= 5)
+#define NV_TYPE_ISFLT(t) ((t) & 0x08)
+
+/* $cX registers contain 4 bits: OCSZ (Z is bit 0) */
+#define NV_CC_FL 0x0
+#define NV_CC_LT 0x1
+#define NV_CC_EQ 0x2
+#define NV_CC_LE 0x3
+#define NV_CC_GT 0x4
+#define NV_CC_NE 0x5
+#define NV_CC_GE 0x6
+#define NV_CC_U  0x8
+#define NV_CC_TR 0xf
+#define NV_CC_O  0x10
+#define NV_CC_C  0x11
+#define NV_CC_A  0x12
+#define NV_CC_S  0x13
+
+#define NV_PC_MAX_INSTRUCTIONS 2048
+#define NV_PC_MAX_VALUES (NV_PC_MAX_INSTRUCTIONS * 4)
+
+static INLINE boolean
+nv_is_vector_op(uint opcode)
+{
+   return (opcode >= NV_OP_TEX) && (opcode <= NV_OP_TXQ);
+}
+
+static INLINE uint
+nv_type_order(ubyte type)
+{
+   switch (type & 0xf) {
+   case NV_TYPE_U8:
+   case NV_TYPE_S8:
+      return 0;
+   case NV_TYPE_U16:
+   case NV_TYPE_S16:
+      return 1;
+   case NV_TYPE_U32:
+   case NV_TYPE_F32:
+   case NV_TYPE_S32:
+   case NV_TYPE_P32:
+      return 2;
+   case NV_TYPE_F64:
+      return 3;
+   }
+   assert(0);
+}
+
+static INLINE uint
+nv_type_sizeof(ubyte type)
+{
+   if (type & 0xf0)
+      return (1 << nv_type_order(type)) * (type >> 4);
+   return 1 << nv_type_order(type);
+}
+
+static INLINE uint
+nv_type_sizeof_base(ubyte type)
+{
+   return 1 << nv_type_order(type);
+}
+
+struct nv_reg {
+   int id;
+   ubyte file;
+   ubyte type; /* type of generating instruction's result */
+   union {
+      float f32;
+      double f64;
+      int32_t s32;
+      uint32_t u32;
+   } imm;
+};
+
+struct nv_range {
+   struct nv_range *next;
+   int bgn;
+   int end;
+};
+
+struct nv_value {
+   struct nv_reg reg; 
+   struct nv_instruction *insn;
+   struct nv_value *join;
+   int n;
+   struct nv_range *livei;
+   int refc;
+
+   struct nv_value *next;
+   struct nv_value *prev;
+};
+
+struct nv_ref {
+   struct nv_value *value;
+   struct nv_instruction *insn;
+   ubyte mod;
+   ubyte typecast;
+   ubyte flags; /* not used yet */
+};
+
+struct nv_basic_block;
+
+struct nv_instruction {
+   struct nv_instruction *next;
+   struct nv_instruction *prev;
+   uint opcode;
+   int serial;
+   struct nv_value *def[4];
+   struct nv_value *flags_def;
+   struct nv_ref *src[5];
+   struct nv_ref *flags_src;
+   struct nv_basic_block *bb;
+   struct nv_basic_block *target; /* target block of control flow insn */
+   ubyte cc;
+   ubyte set_cond      : 4;
+   ubyte fixed         : 1; /* don't optimize away */
+   ubyte is_terminator : 1;
+   ubyte is_join       : 1;
+   ubyte is_long       : 1; /* for emission */
+   /* */
+   ubyte saturate : 1;
+   ubyte centroid : 1;
+   ubyte flat     : 1;
+   ubyte lanes    : 4;
+   ubyte tex_live : 1;
+   /* */
+   ubyte tex_t; /* TIC binding */
+   ubyte tex_s; /* TSC binding */
+   ubyte tex_argc : 3;
+   ubyte tex_cube : 1;
+   ubyte tex_mask : 4;
+   /* */
+   ubyte quadop;
+};
+
+#define CFG_EDGE_FORWARD     0
+#define CFG_EDGE_BACK        1
+#define CFG_EDGE_LOOP_ENTER  2
+#define CFG_EDGE_LOOP_LEAVE  4
+#define CFG_EDGE_FAKE        8
+
+/* 'WALL' edge means where reachability check doesn't follow */
+/* 'LOOP' edge means just having to do with loops */
+#define IS_LOOP_EDGE(k) ((k) & 7)
+#define IS_WALL_EDGE(k) ((k) & 9)
+
+struct nv_basic_block {
+   struct nv_instruction *entry; /* first non-phi instruction */
+   struct nv_instruction *exit;
+   struct nv_instruction *phi; /* very first instruction */
+   int num_instructions;
+
+   struct nv_basic_block *out[2]; /* no indirect branches -> 2 */
+   struct nv_basic_block *in[8]; /* hope that suffices */
+   uint num_in;
+   ubyte out_kind[2];
+   ubyte in_kind[8];
+
+   int id;
+   struct nv_basic_block *last_visitor;
+   uint priv;
+   uint pass_seq;
+
+   uint32_t bin_pos; /* position, size in emitted code */
+   uint32_t bin_size;
+
+   uint32_t live_set[NV_PC_MAX_VALUES / 32];
+};
+
+#define NV_FIXUP_CFLOW_RELOC 0
+#define NV_FIXUP_PARAM_RELOC 1
+
+struct nv_fixup {
+   ubyte type;
+   ubyte shift;
+   uint32_t mask;
+   uint32_t data;
+   uint32_t offset;
+};
+
+static INLINE void
+nv_fixup_apply(uint32_t *bin, struct nv_fixup *fixup, uint32_t data)
+{
+   uint32_t val;
+
+   val = bin[fixup->offset / 4] & ~fixup->mask;
+   data = (fixup->shift < 0) ? (data >> fixup->shift) : (data << fixup->shift);
+   val |= (fixup->data + data) & fixup->mask;
+   bin[fixup->offset / 4] = val;
+}
+
+struct nv_pc {
+   struct nv50_translation_info *ti;
+
+   struct nv_basic_block *root;
+   struct nv_basic_block *current_block;
+   struct nv_basic_block *parent_block;
+
+   int loop_nesting_bound;
+   uint pass_seq;
+
+   struct nv_value values[NV_PC_MAX_VALUES];
+   struct nv_instruction instructions[NV_PC_MAX_INSTRUCTIONS];
+   struct nv_ref **refs;
+   struct nv_basic_block **bb_list;
+   int num_values;
+   int num_instructions;
+   int num_refs;
+   int num_blocks;
+
+   int max_reg[4];
+
+   uint32_t *immd_buf; /* populated on emit */
+   unsigned immd_count;
+
+   uint32_t *emit;
+   unsigned bin_size;
+   unsigned bin_pos;
+
+   struct nv_fixup *fixups;
+   int num_fixups;
+};
+
+void nvbb_insert_tail(struct nv_basic_block *, struct nv_instruction *);
+void nvi_insert_after(struct nv_instruction *, struct nv_instruction *);
+
+static INLINE struct nv_instruction *
+nv_alloc_instruction(struct nv_pc *pc, uint opcode)
+{
+   struct nv_instruction *insn;
+
+   insn = &pc->instructions[pc->num_instructions++];
+   assert(pc->num_instructions < NV_PC_MAX_INSTRUCTIONS);
+
+   insn->cc = NV_CC_TR;
+   insn->opcode = opcode;
+
+   return insn;
+}
+
+static INLINE struct nv_instruction *
+new_instruction(struct nv_pc *pc, uint opcode)
+{
+   struct nv_instruction *insn = nv_alloc_instruction(pc, opcode);
+
+   nvbb_insert_tail(pc->current_block, insn);
+   return insn;
+}
+
+static INLINE struct nv_instruction *
+new_instruction_at(struct nv_pc *pc, struct nv_instruction *at, uint opcode)
+{
+   struct nv_instruction *insn = nv_alloc_instruction(pc, opcode);
+
+   nvi_insert_after(at, insn);
+   return insn;
+}
+
+static INLINE struct nv_value *
+new_value(struct nv_pc *pc, ubyte file, ubyte type)
+{
+   struct nv_value *value = &pc->values[pc->num_values];
+
+   assert(pc->num_values < NV_PC_MAX_VALUES - 1);
+
+   value->n = pc->num_values++;
+   value->join = value;
+   value->reg.id = -1;
+   value->reg.file = file;
+   value->reg.type = type;
+   return value;
+}
+
+static INLINE struct nv_value *
+new_value_like(struct nv_pc *pc, struct nv_value *like)
+{
+   return new_value(pc, like->reg.file, like->reg.type);
+}
+
+static INLINE struct nv_ref *
+new_ref(struct nv_pc *pc, struct nv_value *val)
+{
+   int i;
+   struct nv_ref *ref;
+
+   if ((pc->num_refs % 64) == 0) {
+      const unsigned old_size = pc->num_refs * sizeof(struct nv_ref *);
+      const unsigned new_size = (pc->num_refs + 64) * sizeof(struct nv_ref *);
+
+      pc->refs = REALLOC(pc->refs, old_size, new_size);
+
+      ref = CALLOC(64, sizeof(struct nv_ref));
+      for (i = 0; i < 64; ++i)
+         pc->refs[pc->num_refs + i] = &ref[i];
+   }
+
+   ref = pc->refs[pc->num_refs++];
+   ref->value = val;
+   ref->typecast = val->reg.type;
+
+   ++val->refc;
+   return ref;
+}
+
+static INLINE struct nv_basic_block *
+new_basic_block(struct nv_pc *pc)
+{
+   struct nv_basic_block *bb = CALLOC_STRUCT(nv_basic_block);
+
+   bb->id = pc->num_blocks++;
+   return bb;
+}
+
+static INLINE void
+nv_reference(struct nv_pc *pc, struct nv_ref **d, struct nv_value *s)
+{
+   if (*d)
+      --(*d)->value->refc;
+
+   if (s) {
+      if (!*d)
+         *d = new_ref(pc, s);
+      else {
+         (*d)->value = s;
+         ++(s->refc);
+      }
+   } else {
+      *d = NULL;
+   }
+}
+
+/* nv50_emit.c */
+void nv50_emit_instruction(struct nv_pc *, struct nv_instruction *);
+
+/* nv50_print.c */
+const char *nv_opcode_name(uint opcode);
+void nv_print_instruction(struct nv_instruction *);
+
+/* nv50_pc.c */
+
+void nv_print_program(struct nv_basic_block *b);
+
+boolean nv_op_commutative(uint opcode);
+int nv50_indirect_opnd(struct nv_instruction *);
+boolean nv50_nvi_can_use_imm(struct nv_instruction *, int s);
+boolean nv50_nvi_can_predicate(struct nv_instruction *);
+boolean nv50_nvi_can_load(struct nv_instruction *, int s, struct nv_value *);
+ubyte nv50_supported_src_mods(uint opcode, int s);
+int nv_nvi_refcount(struct nv_instruction *);
+void nv_nvi_delete(struct nv_instruction *);
+void nv_nvi_permute(struct nv_instruction *, struct nv_instruction *);
+void nvbb_attach_block(struct nv_basic_block *parent,
+                       struct nv_basic_block *, ubyte edge_kind);
+boolean nvbb_dominated_by(struct nv_basic_block *, struct nv_basic_block *);
+boolean nvbb_reachable_by(struct nv_basic_block *, struct nv_basic_block *,
+                          struct nv_basic_block *);
+struct nv_basic_block *nvbb_dom_frontier(struct nv_basic_block *);
+int nvcg_replace_value(struct nv_pc *pc, struct nv_value *old_val,
+                       struct nv_value *new_val);
+struct nv_value *nvcg_find_immediate(struct nv_ref *);
+struct nv_value *nvcg_find_constant(struct nv_ref *);
+
+typedef void (*nv_pc_pass_func)(void *priv, struct nv_basic_block *b);
+
+void nv_pc_pass_in_order(struct nv_basic_block *, nv_pc_pass_func, void *);
+
+int nv_pc_exec_pass0(struct nv_pc *pc);
+int nv_pc_exec_pass1(struct nv_pc *pc);
+int nv_pc_exec_pass2(struct nv_pc *pc);
+
+int nv50_tgsi_to_nc(struct nv_pc *, struct nv50_translation_info *);
+
+#endif // NV50_COMPILER_H
diff --git a/src/gallium/drivers/nv50/nv50_pc_emit.c b/src/gallium/drivers/nv50/nv50_pc_emit.c
new file mode 100644 (file)
index 0000000..bb0a6f3
--- /dev/null
@@ -0,0 +1,1195 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nv50_context.h"
+#include "nv50_pc.h"
+
+// Definitions
+
+#define FLAGS_CC_SHIFT    7
+#define FLAGS_ID_SHIFT    12
+#define FLAGS_WR_ID_SHIFT 4
+#define FLAGS_CC_MASK     (0x1f << FLAGS_CC_SHIFT)
+#define FLAGS_ID_MASK     (0x03 << FLAGS_ID_SHIFT)
+#define FLAGS_WR_EN       (1 << 6)
+#define FLAGS_WR_ID_MASK  (0x3 << FLAGS_WR_ID_SHIFT)
+
+const ubyte nv50_inst_min_size_tab[NV_OP_COUNT] =
+{
+   0, 0, 0, 8, 8, 4, 4, 4, 8, 4, 4, 8, 8, 8, 8, 8, /* 15 */
+   8, 8, 8, 4, 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, /* 31 */
+   8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, /* 47 */
+   4, 8, 8, 8, 8, 8, 0, 0, 8
+};
+
+/* XXX: silence, you ! */
+unsigned
+nv50_inst_min_size(struct nv_instruction *i);
+
+unsigned
+nv50_inst_min_size(struct nv_instruction *i)
+{
+   int n;
+
+   if (nv50_inst_min_size_tab[i->opcode] > 4)
+      return 8;
+
+   if (i->def[0] && i->def[0]->reg.file != NV_FILE_GPR)
+      return 8;
+   if (i->def[0]->join->reg.id > 63)
+      return 8;
+
+   for (n = 0; n < 3; ++n) {
+      if (!i->src[n])
+         break;
+      if (i->src[n]->value->reg.file != NV_FILE_GPR &&
+          i->src[n]->value->reg.file != NV_FILE_MEM_V)
+         return 8;
+      if (i->src[n]->value->reg.id > 63)
+         return 8;
+   }
+
+   if (i->flags_def || i->flags_src || i->src[4])
+      return 8;
+
+   if (i->is_join)
+      return 8;
+
+   if (i->src[2]) {
+      if (i->saturate || i->src[2]->mod)
+         return 8;
+      if (i->src[0]->mod ^ i->src[1]->mod)
+         return 8;
+      if ((i->src[0]->mod | i->src[1]->mod) & NV_MOD_ABS)
+         return 8;
+      if (i->def[0]->join->reg.id < 0 ||
+          i->def[0]->join->reg.id != i->src[2]->value->join->reg.id)
+         return 8;
+   }
+
+   return nv50_inst_min_size_tab[i->opcode];
+}
+
+static INLINE ubyte
+STYPE(struct nv_instruction *nvi, int s)
+{
+   return nvi->src[s]->typecast;
+}
+
+static INLINE ubyte
+DTYPE(struct nv_instruction *nvi, int d)
+{
+   return nvi->def[d]->reg.type;
+}
+
+static INLINE struct nv_reg *
+SREG(struct nv_ref *ref)
+{
+   return &ref->value->join->reg;
+}
+
+static INLINE struct nv_reg *
+DREG(struct nv_value *val)
+{
+   return &val->join->reg;
+}
+
+static INLINE ubyte
+SFILE(struct nv_instruction *nvi, int s)
+{
+   return nvi->src[s]->value->reg.file;
+}
+
+static INLINE ubyte
+DFILE(struct nv_instruction *nvi, int d)
+{
+   return nvi->def[0]->reg.file;
+}
+
+static INLINE void
+SID(struct nv_pc *pc, struct nv_ref *ref, int pos)
+{
+   pc->emit[pos / 32] |= SREG(ref)->id << (pos % 32);
+}
+
+static INLINE void
+DID(struct nv_pc *pc, struct nv_value *val, int pos)
+{
+   pc->emit[pos / 32] |= DREG(val)->id << (pos % 32);
+}
+
+static INLINE uint32_t
+get_immd_u32(struct nv_ref *ref)
+{
+   assert(ref->value->reg.file == NV_FILE_IMM);
+   return ref->value->reg.imm.u32;
+}
+
+static INLINE void
+set_immd_u32(struct nv_pc *pc, uint32_t u32)
+{
+   pc->emit[1] |= 3;
+   pc->emit[0] |= (u32 & 0x3f) << 16;
+   pc->emit[1] |= (u32 >> 6) << 2;
+}
+
+static INLINE void
+set_immd(struct nv_pc *pc, struct nv_ref *ref)
+{
+   assert(ref->value->reg.file == NV_FILE_IMM);
+   set_immd_u32(pc, get_immd_u32(ref));
+}
+
+static void
+new_fixup(struct nv_pc *pc, unsigned type, uint32_t data, uint32_t m, int s)
+{
+   const unsigned size = sizeof(struct nv_fixup);
+   const unsigned n = pc->num_fixups;
+   return;
+
+   if (!(n % 8))
+      pc->fixups = REALLOC(pc->fixups, n * size, (n + 8) * size);
+
+   pc->fixups[n].offset = pc->bin_pos + (s / 32);
+   pc->fixups[n].type = type;
+   pc->fixups[n].data = data;
+   pc->fixups[n].mask = m << (s % 32);
+   pc->fixups[n].shift = s % 32;
+
+   ++pc->num_fixups;
+
+   assert(((data << (s % 32)) & pc->fixups[n].mask) == (data << (s % 32)));
+}
+
+static void
+nv_pc_alloc_immd(struct nv_pc *pc, struct nv_ref *ref)
+{
+   uint32_t i, val = get_immd_u32(ref);
+
+   for (i = 0; i < pc->immd_count; ++i)
+      if (pc->immd_buf[i] == val)
+         break;
+
+   if (i == pc->immd_count) {
+      if (!(pc->immd_count % 8))
+         pc->immd_buf = REALLOC(pc->immd_buf,
+                               pc->immd_count * 4, (pc->immd_count + 8) * 4);
+      pc->immd_buf[pc->immd_count++] = val;
+   }
+
+   SREG(ref)->id = i;
+}
+
+static INLINE void
+set_pred(struct nv_pc *pc, struct nv_instruction *i)
+{
+   assert(!(pc->emit[1] & 0x00003f80));
+
+   pc->emit[1] |= i->cc << 7;
+   if (i->flags_src)
+      pc->emit[1] |= SREG(i->flags_src)->id << 12;
+}
+
+static INLINE void
+set_pred_wr(struct nv_pc *pc, struct nv_instruction *i)
+{
+   assert(!(pc->emit[1] & 0x00000070));
+
+   if (i->flags_def)
+      pc->emit[1] |= (DREG(i->flags_def)->id << 4) | 0x40;
+}
+
+static INLINE void
+set_a16_bits(struct nv_pc *pc, uint id)
+{
+   ++id; /* $a0 is always 0 */
+   pc->emit[0] |= (id & 3) << 26;
+   pc->emit[1] |= id & 4;
+}
+
+static INLINE void
+set_addr(struct nv_pc *pc, struct nv_instruction *i)
+{
+   if (i->src[4])
+      set_a16_bits(pc, SREG(i->src[4])->id);
+}
+
+static void
+set_dst(struct nv_pc *pc, struct nv_value *value)
+{
+   struct nv_reg *reg = &value->join->reg;
+
+   if (reg->id < 0) {
+      pc->emit[0] |= (127 << 2) | 1; /* set 'long'-bit to catch bugs */
+      pc->emit[1] |= 0x8;
+      return;
+   }
+
+   if (reg->file == NV_FILE_OUT)
+      pc->emit[1] |= 0x8;
+   else
+   if (reg->file == NV_FILE_ADDR)
+      assert(0);
+
+   pc->emit[0] |= reg->id << 2;
+}
+
+static void
+set_src_0(struct nv_pc *pc, struct nv_ref *ref)
+{
+   struct nv_reg *reg = SREG(ref);
+
+   if (reg->file == NV_FILE_MEM_S)
+      pc->emit[1] |= 0x00200000;
+   else
+   if (reg->file == NV_FILE_MEM_P)
+      pc->emit[0] |= 0x01800000;
+   else
+   if (reg->file != NV_FILE_GPR)
+      NOUVEAU_ERR("invalid src0 register file: %d\n", reg->file);
+
+   assert(reg->id < 128);
+   pc->emit[0] |= reg->id << 9;
+}
+
+static void
+set_src_1(struct nv_pc *pc, struct nv_ref *ref)
+{
+   struct nv_reg *reg = SREG(ref);
+
+   if (reg->file >= NV_FILE_MEM_C(0) &&
+       reg->file <= NV_FILE_MEM_C(15)) {
+      assert(!(pc->emit[1] & 0x01800000));
+
+      pc->emit[0] |= 0x00800000;
+      pc->emit[1] |= (reg->file - NV_FILE_MEM_C(0)) << 22;
+   } else
+   if (reg->file != NV_FILE_GPR)
+      NOUVEAU_ERR("invalid src1 register file: %d\n", reg->file);
+
+   assert(reg->id < 128);
+   pc->emit[0] |= reg->id << 16;
+}
+
+static void
+set_src_2(struct nv_pc *pc, struct nv_ref *ref)
+{
+   struct nv_reg *reg = SREG(ref);
+
+   if (reg->file >= NV_FILE_MEM_C(0) &&
+       reg->file <= NV_FILE_MEM_C(15)) {
+      assert(!(pc->emit[1] & 0x01800000));
+
+      pc->emit[0] |= 0x01000000;
+      pc->emit[1] |= (reg->file - NV_FILE_MEM_C(0)) << 22;
+   } else
+   if (reg->file != NV_FILE_GPR)
+      NOUVEAU_ERR("invalid src2 register file: %d\n", reg->file);
+
+   assert(reg->id < 128);
+   pc->emit[1] |= reg->id << 14;
+}
+
+/* the default form:
+ * - long instruction
+ * - 1 to 3 sources in slots 0, 1, 2
+ * - address & flags
+ */
+static void
+emit_form_MAD(struct nv_pc *pc, struct nv_instruction *i)
+{
+   pc->emit[0] |= 1;
+
+   set_pred(pc, i);
+   set_pred_wr(pc, i);
+
+   if (i->def[0])
+      set_dst(pc, i->def[0]);
+   else {
+      pc->emit[0] |= 0x01fc;
+      pc->emit[1] |= 0x0008;
+   }
+
+   if (i->src[0])
+      set_src_0(pc, i->src[0]);
+
+   if (i->src[1])
+      set_src_1(pc, i->src[1]);
+
+   if (i->src[2])
+      set_src_2(pc, i->src[2]);
+
+   set_addr(pc, i);
+}
+
+/* like default form, but 2nd source in slot 2, no 3rd source */
+static void
+emit_form_ADD(struct nv_pc *pc, struct nv_instruction *i)
+{
+   pc->emit[0] |= 1;
+
+   if (i->def[0])
+      set_dst(pc, i->def[0]);
+   else {
+      pc->emit[0] |= 0x01fc;
+      pc->emit[1] |= 0x0008;
+   }
+
+   set_pred(pc, i);
+   set_pred_wr(pc, i);
+
+   if (i->src[0])
+      set_src_0(pc, i->src[0]);
+
+   if (i->src[1])
+      set_src_2(pc, i->src[1]);
+
+   set_addr(pc, i);
+}
+
+/* short mul */
+static void
+emit_form_MUL(struct nv_pc *pc, struct nv_instruction *i)
+{
+   assert(!i->is_long && !(pc->emit[0] & 1));
+
+   assert(i->def[0]);
+   set_dst(pc, i->def[0]);
+
+   if (i->src[0])
+      set_src_0(pc, i->src[0]);
+
+   if (i->src[1])
+      set_src_1(pc, i->src[1]);
+}
+
+/* default immediate form
+ * - 1 to 3 sources where last is immediate
+ * - no address or predicate possible
+ */
+static void
+emit_form_IMM(struct nv_pc *pc, struct nv_instruction *i, ubyte mod_mask)
+{
+   pc->emit[0] |= 1;
+
+   assert(i->def[0]);
+   assert(i->src[0]);
+   set_dst(pc, i->def[0]);
+
+   assert(!i->src[4] && !i->flags_src && !i->flags_def);
+
+   if (i->src[2]) {
+      set_immd(pc, i->src[2]);
+      set_src_0(pc, i->src[1]);
+      set_src_1(pc, i->src[0]);
+   } else
+   if (i->src[1]) {
+      set_immd(pc, i->src[1]);
+      set_src_0(pc, i->src[0]);
+   } else
+      set_immd(pc, i->src[0]);
+
+   assert(!mod_mask);
+}
+
+static void
+set_ld_st_size(struct nv_pc *pc, ubyte type)
+{
+   switch (type) {
+   case NV_TYPE_F64:
+      pc->emit[1] |= 0x8000;
+      break;
+   case NV_TYPE_F32:
+   case NV_TYPE_S32:
+   case NV_TYPE_U32:
+      pc->emit[1] |= 0xc000;
+      break;
+   case NV_TYPE_S16:
+      pc->emit[1] |= 0x6000;
+      break;
+   case NV_TYPE_U16:
+      pc->emit[1] |= 0x4000;
+      break;
+   case NV_TYPE_S8:
+      pc->emit[1] |= 0x2000;
+      break;
+   default:
+      break;
+   }
+}
+
+static void
+emit_ld(struct nv_pc *pc, struct nv_instruction *i)
+{
+   ubyte sf = SFILE(i, 0);
+
+   if (sf == NV_FILE_IMM) {
+      sf = NV_FILE_MEM_C(0);
+      nv_pc_alloc_immd(pc, i->src[0]);
+
+      new_fixup(pc, NV_FIXUP_PARAM_RELOC, SREG(i->src[0])->id, 0xffff, 9);
+   }
+
+   if (sf == NV_FILE_MEM_S ||
+       sf == NV_FILE_MEM_P) {
+      pc->emit[0] = 0x10000001;
+      pc->emit[1] = 0x04200000 | (0x3c << 12);
+      if (sf == NV_FILE_MEM_P)
+         pc->emit[0] |= 0x01800000;
+   } else
+   if (sf >= NV_FILE_MEM_C(0) &&
+       sf <= NV_FILE_MEM_C(15)) {
+      pc->emit[0] = 0x10000001;
+      pc->emit[1] = 0x24000000;
+      pc->emit[1] |= (sf - NV_FILE_MEM_C(0)) << 22;
+   } else
+   if (sf >= NV_FILE_MEM_G(0) &&
+       sf <= NV_FILE_MEM_G(15)) {
+      pc->emit[0] = 0xd0000001 | ((sf - NV_FILE_MEM_G(0)) << 16);
+      pc->emit[1] = 0xa0000000;
+
+      assert(i->src[4] && SREG(i->src[4])->file == NV_FILE_GPR);
+      SID(pc, i->src[4], 9);
+   } else
+   if (sf == NV_FILE_MEM_L) {
+      pc->emit[0] = 0xd0000001;
+      pc->emit[1] = 0x40000000;
+   } else {
+      NOUVEAU_ERR("invalid ld source file\n");
+      abort();
+   }
+
+   set_ld_st_size(pc, STYPE(i, 0));
+
+   set_dst(pc, i->def[0]);
+   set_pred_wr(pc, i);
+
+   set_pred(pc, i);
+
+   if (sf < NV_FILE_MEM_G(0) ||
+       sf > NV_FILE_MEM_G(15)) {
+      SID(pc, i->src[0], 9);
+      set_addr(pc, i);
+   }
+}
+
+static void
+emit_st(struct nv_pc *pc, struct nv_instruction *i)
+{
+
+}
+
+static int
+verify_mov(struct nv_instruction *i)
+{
+   ubyte sf = SFILE(i, 0);
+   ubyte df = DFILE(i, 0);
+
+   if (df == NV_FILE_GPR)
+      return 0;
+
+   if (df != NV_FILE_OUT &&
+       df != NV_FILE_FLAGS &&
+       df != NV_FILE_ADDR)
+      return 1;
+
+   if (sf == NV_FILE_FLAGS)
+      return 2;
+   if (sf == NV_FILE_ADDR)
+      return 3;
+   if (sf == NV_FILE_IMM && df != NV_FILE_OUT)
+      return 4;
+
+   return 0;
+}
+
+static void
+emit_mov(struct nv_pc *pc, struct nv_instruction *i)
+{
+   assert(!verify_mov(i));
+
+   if (SFILE(i, 0) >= NV_FILE_MEM_S)
+      emit_ld(pc, i);
+   else
+   if (SFILE(i, 0) == NV_FILE_FLAGS) {
+      pc->emit[0] = 0x00000001 | (DREG(i->def[0])->id << 2);
+      pc->emit[1] = 0x20000780 | (SREG(i->src[0])->id << 12);
+   } else
+   if (SFILE(i, 0) == NV_FILE_ADDR) {
+      pc->emit[0] = 0x00000001 | (DREG(i->def[0])->id << 2);
+      pc->emit[1] = 0x40000780;
+      set_a16_bits(pc, SREG(i->src[0])->id);
+   } else
+   if (DFILE(i, 0) == NV_FILE_FLAGS) {
+      pc->emit[0] = 0x00000001;
+      pc->emit[1] = 0xa0000000 | (1 << 6);
+      set_pred(pc, i);
+      pc->emit[0] |= SREG(i->src[0])->id << 9;
+      pc->emit[1] |= DREG(i->def[0])->id << 4;
+   } else
+   if (SFILE(i, 0) == NV_FILE_IMM) {
+      if (i->opcode == NV_OP_LDA) {
+         emit_ld(pc, i);
+      } else {
+         pc->emit[0] = 0x10008001;
+         pc->emit[1] = 0x00000003;
+
+         emit_form_IMM(pc, i, 0);
+      }
+   } else {
+      pc->emit[0] = 0x10000000;
+      pc->emit[0] |= DREG(i->def[0])->id << 2;
+      pc->emit[0] |= SREG(i->src[0])->id << 9;
+
+      if (!i->is_long) {
+         pc->emit[0] |= 0x8000;
+      } else {
+         pc->emit[0] |= 0x00000001;
+         pc->emit[1] = 0x0403c000;
+
+         set_pred(pc, i);
+      }
+   }
+
+   if (DFILE(i, 0) == NV_FILE_OUT)
+      pc->emit[1] |= 0x8;
+}
+
+static void
+emit_interp(struct nv_pc *pc, struct nv_instruction *i)
+{
+   pc->emit[0] = 0x80000000;
+
+   assert(DFILE(i, 0) == NV_FILE_GPR);
+   assert(SFILE(i, 0) == NV_FILE_MEM_V);
+
+   DID(pc, i->def[0], 2);
+   SID(pc, i->src[0], 16);
+
+   if (i->flat)
+      pc->emit[0] |= 1 << 8;
+   else
+   if (i->opcode == NV_OP_PINTERP) {
+      pc->emit[0] |= 1 << 25;
+      pc->emit[0] |= SREG(i->src[1])->id << 9;
+   }
+
+   if (i->centroid)
+      pc->emit[0] |= 1 << 24;
+
+   assert(i->is_long || !i->flags_src);
+
+   if (i->is_long) {
+      set_pred(pc, i);
+
+      pc->emit[1] |=
+             (pc->emit[0] & (3 << 24)) >> (24 - 16) |
+             (pc->emit[0] & (1 <<  8)) >> (18 -  8);
+
+      pc->emit[0] |= 1;
+      pc->emit[0] &= ~0x03000100;
+   }
+}
+
+static void
+emit_minmax(struct nv_pc *pc, struct nv_instruction *i)
+{
+   pc->emit[0] = 0x30000000;
+   pc->emit[1] = (i->opcode == NV_OP_MIN) ? (2 << 28) : 0;
+
+   switch (DTYPE(i, 0)) {
+   case NV_TYPE_F32:
+      pc->emit[0] |= 0x80000000;
+      pc->emit[1] |= 0x80000000;
+      break;
+   case NV_TYPE_S32:
+      pc->emit[1] |= 0x8c000000;
+      break;
+   case NV_TYPE_U32:
+      pc->emit[1] |= 0x84000000;
+      break;
+   }
+       
+   emit_form_MAD(pc, i);
+
+   if (i->src[0]->mod & NV_MOD_ABS) pc->emit[1] |= 0x00100000;
+   if (i->src[1]->mod & NV_MOD_ABS) pc->emit[1] |= 0x00080000;
+}
+
+static void
+emit_add_f32(struct nv_pc *pc, struct nv_instruction *i)
+{
+   pc->emit[0] = 0xb0000000;
+
+   if (SFILE(i, 1) == NV_FILE_IMM) {
+      emit_form_IMM(pc, i, 0);
+
+      if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 0x8000;
+      if (i->src[1]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 22;
+   } else
+   if (i->is_long) {
+      emit_form_ADD(pc, i);
+
+      if (i->src[0]->mod & NV_MOD_NEG) pc->emit[1] |= 1 << 26;
+      if (i->src[1]->mod & NV_MOD_NEG) pc->emit[1] |= 1 << 27;
+   } else {
+      emit_form_MUL(pc, i);
+
+      if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 0x8000;
+      if (i->src[1]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 22;
+   }
+}
+
+static void
+emit_add_b32(struct nv_pc *pc, struct nv_instruction *i)
+{
+   pc->emit[0] = 0x20008000;
+
+   if (SFILE(i, 1) == NV_FILE_IMM) {
+      emit_form_IMM(pc, i, 0);
+   } else
+   if (i->is_long) {
+      pc->emit[0] = 0x20000000;
+      pc->emit[1] = 0x04000000;
+      emit_form_ADD(pc, i);
+   } else {
+      emit_form_MUL(pc, i);
+   }
+
+   if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 28;
+   if (i->src[1]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 22;
+}
+
+static void
+emit_add_a16(struct nv_pc *pc, struct nv_instruction *i)
+{
+   pc->emit[0] = 0xd0000001 | (get_immd_u32(i->src[0]) << 9);
+   pc->emit[1] = 0x20000000;
+
+   pc->emit[0] |= (DREG(i->def[0])->id + 1) << 2;
+
+   set_pred(pc, i);
+
+   if (i->src[1])
+      set_a16_bits(pc, SREG(i->src[1])->id);
+}
+
+static void
+emit_flow(struct nv_pc *pc, struct nv_instruction *i, ubyte flow_op)
+{
+   pc->emit[0] = 0x00000003 | (flow_op << 28);
+   pc->emit[1] = 0x00000000;
+
+   set_pred(pc, i);
+
+   if (i->target && (i->opcode != NV_OP_BREAK)) {
+      new_fixup(pc, NV_FIXUP_CFLOW_RELOC, i->target->bin_pos, 0x7ff800, 11);
+      pc->emit[0] |= (i->target->bin_pos / 4) << 11;
+   }
+}
+
+static INLINE void
+emit_add(struct nv_pc *pc, struct nv_instruction *i)
+{
+   if (DFILE(i, 0) == NV_FILE_ADDR)
+      emit_add_a16(pc, i);
+   else {
+      switch (DTYPE(i, 0)) {
+      case NV_TYPE_F32:
+         emit_add_f32(pc, i);
+         break;
+      case NV_TYPE_U32:
+      case NV_TYPE_S32:
+         emit_add_b32(pc, i);
+         break;
+      }
+   }
+}
+
+static void
+emit_bitop2(struct nv_pc *pc, struct nv_instruction *i)
+{
+   pc->emit[0] = 0xd0000000;
+
+   if (SFILE(i, 1) == NV_FILE_IMM) {
+      emit_form_IMM(pc, i, 0);
+
+      if (i->opcode == NV_OP_OR)
+         pc->emit[0] |= 0x0100;
+      else
+      if (i->opcode == NV_OP_XOR)
+         pc->emit[0] |= 0x8000;
+   } else {
+      emit_form_MAD(pc, i);
+
+      pc->emit[1] |= 0x04000000;
+
+      if (i->opcode == NV_OP_OR)
+         pc->emit[1] |= 0x4000;
+      else
+      if (i->opcode == NV_OP_XOR)
+         pc->emit[1] |= 0x8000;
+   }
+}
+
+static void
+emit_arl(struct nv_pc *pc, struct nv_instruction *i)
+{
+   assert(SFILE(i, 0) == NV_FILE_GPR);
+   assert(SFILE(i, 1) == NV_FILE_IMM);
+
+   assert(!i->flags_def);
+
+   pc->emit[0] = 0x00000001;
+   pc->emit[1] = 0xc0000000;
+
+   pc->emit[0] |= (i->def[0]->reg.id + 1) << 2;
+   set_pred(pc, i);
+   set_src_0(pc, i->src[0]);
+   pc->emit[0] |= (get_immd_u32(i->src[1]) & 0x3f) << 16;
+}
+
+static void
+emit_shift(struct nv_pc *pc, struct nv_instruction *i)
+{
+   if (DFILE(i, 0) == NV_FILE_ADDR) {
+      emit_arl(pc, i);
+      return;
+   }
+
+   pc->emit[0] = 0x30000001;
+   pc->emit[1] = 0xc4000000;
+
+   if (i->opcode == NV_OP_SHR)
+      pc->emit[1] |= 1 << 29;
+
+   if (SFILE(i, 1) == NV_FILE_IMM) {
+      pc->emit[1] |= 1 << 20;
+      pc->emit[0] |= (get_immd_u32(i->src[1]) & 0x7f) << 16;
+
+      set_pred(pc, i);
+   } else
+      emit_form_MAD(pc, i);
+
+   if (STYPE(i, 0) == NV_TYPE_S32)
+      pc->emit[1] |= 1 << 27;
+}
+
+static void
+emit_flop(struct nv_pc *pc, struct nv_instruction *i)
+{
+   struct nv_ref *src0 = i->src[0];
+
+   pc->emit[0] = 0x90000000;
+
+   assert(STYPE(i, 0) == NV_TYPE_F32);
+   assert(SFILE(i, 0) == NV_FILE_GPR);
+
+   if (!i->is_long) {
+      emit_form_MUL(pc, i);
+      assert(i->opcode == NV_OP_RCP && !src0->mod);
+      return;
+   }
+
+   pc->emit[1] = (i->opcode - NV_OP_RCP) << 29;
+
+   emit_form_MAD(pc, i);
+
+   if (src0->mod & NV_MOD_NEG) pc->emit[1] |= 0x04000000;
+   if (src0->mod & NV_MOD_ABS) pc->emit[1] |= 0x00100000;
+}
+
+static void
+emit_mad_f32(struct nv_pc *pc, struct nv_instruction *i)
+{
+   const boolean neg_mul = (i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG;
+   const boolean neg_add = (i->src[2]->mod & NV_MOD_NEG);
+
+   pc->emit[0] = 0xe0000000;
+
+   if (!i->is_long) {
+      emit_form_MUL(pc, i);
+      assert(!neg_mul && !neg_add);
+      return;
+   }
+
+   emit_form_MAD(pc, i);
+
+   if (neg_mul) pc->emit[1] |= 0x04000000;
+   if (neg_add) pc->emit[1] |= 0x08000000;
+
+   if (i->saturate)
+      pc->emit[1] |= 0x20000000;
+}
+
+static INLINE void
+emit_mad(struct nv_pc *pc, struct nv_instruction *i)
+{
+   emit_mad_f32(pc, i);
+}
+
+static void
+emit_mul_f32(struct nv_pc *pc, struct nv_instruction *i)
+{
+   boolean neg = (i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG;
+
+   pc->emit[0] = 0xc0000000;
+
+   if (SFILE(i, 1) == NV_FILE_IMM) {
+      emit_form_IMM(pc, i, 0);
+
+      if (neg)
+         pc->emit[0] |= 0x8000;
+   } else
+   if (i->is_long) {
+      emit_form_MAD(pc, i);
+
+      if (neg)
+         pc->emit[1] |= 0x08 << 24;
+   } else {
+      emit_form_MUL(pc, i);
+
+      if (neg)
+         pc->emit[0] |= 0x8000;
+   }
+}
+
+static void
+emit_set(struct nv_pc *pc, struct nv_instruction *nvi)
+{
+   assert(nvi->is_long);
+
+   pc->emit[0] = 0x30000000;
+   pc->emit[1] = 0x60000000;
+
+   pc->emit[1] |= nvi->set_cond << 14;
+
+   switch (STYPE(nvi, 0)) {
+   case NV_TYPE_U32: pc->emit[1] |= 0x04000000; break;
+   case NV_TYPE_S32: pc->emit[1] |= 0x0c000000; break;
+   case NV_TYPE_F32: pc->emit[0] |= 0x80000000; break;
+   default:
+      assert(0);
+      break;
+   }
+
+   emit_form_MAD(pc, nvi);
+}
+
+#define CVT_RN    (0x00 << 16)
+#define CVT_FLOOR (0x02 << 16)
+#define CVT_CEIL  (0x04 << 16)
+#define CVT_TRUNC (0x06 << 16)
+#define CVT_SAT   (0x08 << 16)
+#define CVT_ABS   (0x10 << 16)
+
+#define CVT_X32_X32 0x04004000
+#define CVT_X32_S32 0x04014000
+#define CVT_F32_F32 ((0xc0 << 24) | CVT_X32_X32)
+#define CVT_S32_F32 ((0x88 << 24) | CVT_X32_X32)
+#define CVT_U32_F32 ((0x80 << 24) | CVT_X32_X32)
+#define CVT_F32_S32 ((0x40 << 24) | CVT_X32_S32)
+#define CVT_F32_U32 ((0x40 << 24) | CVT_X32_X32)
+#define CVT_S32_S32 ((0x08 << 24) | CVT_X32_S32)
+#define CVT_S32_U32 ((0x08 << 24) | CVT_X32_X32)
+#define CVT_U32_S32 ((0x00 << 24) | CVT_X32_S32)
+#define CVT_U32_U32 ((0x00 << 24) | CVT_X32_X32)
+
+#define CVT_NEG 0x20000000
+#define CVT_RI  0x08000000
+
+static void
+emit_cvt(struct nv_pc *pc, struct nv_instruction *nvi)
+{
+   ubyte dst_type = nvi->def[0] ? DTYPE(nvi, 0) : STYPE(nvi, 0);
+
+   pc->emit[0] = 0xa0000000;
+
+   switch (dst_type) {
+   case NV_TYPE_F32:
+      switch (STYPE(nvi, 0)) {
+      case NV_TYPE_F32: pc->emit[1] = CVT_F32_F32; break;
+      case NV_TYPE_S32: pc->emit[1] = CVT_F32_S32; break;
+      case NV_TYPE_U32: pc->emit[1] = CVT_F32_U32; break;
+      }
+      break;
+   case NV_TYPE_S32:
+      switch (STYPE(nvi, 0)) {
+      case NV_TYPE_F32: pc->emit[1] = CVT_S32_F32; break;
+      case NV_TYPE_S32: pc->emit[1] = CVT_S32_S32; break;
+      case NV_TYPE_U32: pc->emit[1] = CVT_S32_U32; break;
+      }
+      break;
+   case NV_TYPE_U32:
+      switch (STYPE(nvi, 0)) {
+      case NV_TYPE_F32: pc->emit[1] = CVT_U32_F32; break;
+      case NV_TYPE_S32: pc->emit[1] = CVT_U32_S32; break;
+      case NV_TYPE_U32: pc->emit[1] = CVT_U32_U32; break;
+      }
+      break;
+   }
+   if (pc->emit[1] == CVT_F32_F32 &&
+       (nvi->opcode == NV_OP_CEIL || nvi->opcode == NV_OP_FLOOR ||
+       nvi->opcode == NV_OP_TRUNC))
+       pc->emit[1] |= CVT_RI;
+
+   switch (nvi->opcode) {
+   case NV_OP_CEIL:  pc->emit[1] |= CVT_CEIL; break;
+   case NV_OP_FLOOR: pc->emit[1] |= CVT_FLOOR; break;
+   case NV_OP_TRUNC: pc->emit[1] |= CVT_TRUNC; break;
+
+   case NV_OP_ABS: pc->emit[1] |= CVT_ABS; break;
+   case NV_OP_SAT: pc->emit[1] |= CVT_SAT; break;
+   case NV_OP_NEG: pc->emit[1] |= CVT_NEG; break;
+   default:
+      assert(nvi->opcode == NV_OP_CVT);
+      break;
+   }
+   assert(nvi->opcode != NV_OP_ABS || !(nvi->src[0]->mod & NV_MOD_NEG));
+
+   if (nvi->src[0]->mod & NV_MOD_NEG) pc->emit[1] ^= CVT_NEG;
+   if (nvi->src[0]->mod & NV_MOD_ABS) pc->emit[1] |= CVT_ABS;
+
+   emit_form_MAD(pc, nvi);
+}
+
+static void
+emit_tex(struct nv_pc *pc, struct nv_instruction *i)
+{
+   pc->emit[0] = 0xf0000001;
+   pc->emit[1] = 0x00000000;
+
+   DID(pc, i->def[0], 2);
+
+   set_pred(pc, i);
+
+   pc->emit[0] |= i->tex_t << 9;
+   pc->emit[0] |= i->tex_s << 17;
+
+   pc->emit[0] |= (i->tex_argc - 1) << 22;
+
+   pc->emit[0] |= (i->tex_mask & 0x3) << 25;
+   pc->emit[1] |= (i->tex_mask & 0xc) << 12;
+
+   if (i->tex_live)
+      pc->emit[1] |= 4;
+
+   if (i->tex_cube)
+      pc->emit[0] |= 0x08000000;
+
+   if (i->opcode == NV_OP_TXB)
+      pc->emit[1] |= 0x20000000;
+   else
+   if (i->opcode == NV_OP_TXL)
+      pc->emit[1] |= 0x40000000;
+}
+
+static void
+emit_cvt2fixed(struct nv_pc *pc, struct nv_instruction *i)
+{
+   ubyte mod = i->src[0]->mod;
+
+   pc->emit[0] = 0xb0000000;
+   pc->emit[1] = 0xc0000000;
+
+   if (i->opcode == NV_OP_PREEX2)
+      pc->emit[1] |= 0x4000;
+
+   emit_form_MAD(pc, i);
+
+   if (mod & NV_MOD_NEG) pc->emit[1] |= 0x04000000;
+   if (mod & NV_MOD_ABS) pc->emit[1] |= 0x00100000;
+}
+
+static void
+emit_ddx(struct nv_pc *pc, struct nv_instruction *i)
+{
+   assert(i->is_long && SFILE(i, 0) == NV_FILE_GPR);
+
+   pc->emit[0] = (i->src[0]->mod & NV_MOD_NEG) ? 0xc0240001 : 0xc0140001;
+   pc->emit[1] = (i->src[0]->mod & NV_MOD_NEG) ? 0x86400000 : 0x89800000;
+
+   DID(pc, i->def[0], 2);
+   SID(pc, i->src[0], 9);
+   SID(pc, i->src[0], 32 + 14);
+
+   set_pred(pc, i);
+   set_pred_wr(pc, i);
+}
+
+static void
+emit_ddy(struct nv_pc *pc, struct nv_instruction *i)
+{
+   assert(i->is_long && SFILE(i, 0) == NV_FILE_GPR);
+
+   pc->emit[0] = (i->src[0]->mod & NV_MOD_NEG) ? 0xc0250001 : 0xc0150001;
+   pc->emit[1] = (i->src[0]->mod & NV_MOD_NEG) ? 0x85800000 : 0x8a400000;
+
+   DID(pc, i->def[0], 2);
+   SID(pc, i->src[0], 9);
+   SID(pc, i->src[0], 32 + 14);
+
+   set_pred(pc, i);
+   set_pred_wr(pc, i);
+}
+
+static void
+emit_quadop(struct nv_pc *pc, struct nv_instruction *i)
+{
+   pc->emit[0] = 0xc0000000;
+   pc->emit[1] = 0x80000000;
+
+   emit_form_ADD(pc, i);
+
+   pc->emit[0] |= i->lanes << 16;
+
+   pc->emit[0] |= (i->quadop & 0x03) << 20;
+   pc->emit[1] |= (i->quadop & 0xfc) << 20;
+}
+
+void
+nv50_emit_instruction(struct nv_pc *pc, struct nv_instruction *i)
+{
+   /* nv_print_instruction(i); */
+
+   switch (i->opcode) {
+   case NV_OP_MOV:
+      if (DFILE(i, 0) == NV_FILE_ADDR)
+         emit_add_a16(pc, i);
+      else
+         emit_mov(pc, i);
+      break;
+   case NV_OP_LDA:
+      emit_mov(pc, i);
+      break;
+   case NV_OP_STA:
+      emit_st(pc, i);
+      break;
+   case NV_OP_LINTERP:
+   case NV_OP_PINTERP:
+      emit_interp(pc, i);
+      break;
+   case NV_OP_ADD:
+      emit_add(pc, i);
+      break;
+   case NV_OP_AND:
+   case NV_OP_OR:
+   case NV_OP_XOR:
+      emit_bitop2(pc, i);
+      break;
+   case NV_OP_CVT:
+   case NV_OP_ABS:
+   case NV_OP_NEG:
+   case NV_OP_SAT:
+   case NV_OP_CEIL:
+   case NV_OP_FLOOR:
+   case NV_OP_TRUNC:
+      emit_cvt(pc, i);
+      break;
+   case NV_OP_DFDX:
+      emit_ddx(pc, i);
+      break;
+   case NV_OP_DFDY:
+      emit_ddy(pc, i);
+      break;
+   case NV_OP_RCP:
+   case NV_OP_RSQ:
+   case NV_OP_LG2:
+   case NV_OP_SIN:
+   case NV_OP_COS:
+   case NV_OP_EX2:
+      emit_flop(pc, i);
+      break;
+   case NV_OP_PRESIN:
+   case NV_OP_PREEX2:
+      emit_cvt2fixed(pc, i);
+      break;
+   case NV_OP_MAD:
+      emit_mad(pc, i);
+      break;
+   case NV_OP_MAX:
+   case NV_OP_MIN:
+      emit_minmax(pc, i);
+      break;
+   case NV_OP_MUL:
+      emit_mul_f32(pc, i);
+      break;
+   case NV_OP_SET:
+      emit_set(pc, i);
+      break;
+   case NV_OP_SHL:
+   case NV_OP_SHR:
+      emit_shift(pc, i);
+      break;
+   case NV_OP_TEX:
+   case NV_OP_TXB:
+   case NV_OP_TXL:
+      emit_tex(pc, i);
+      break;
+   case NV_OP_QUADOP:
+      emit_quadop(pc, i);
+      break;
+   case NV_OP_KIL:
+      emit_flow(pc, i, 0x0);
+      break;
+   case NV_OP_BRA:
+      emit_flow(pc, i, 0x1);
+      break;
+   case NV_OP_CALL:
+      emit_flow(pc, i, 0x2);
+      break;
+   case NV_OP_RET:
+      emit_flow(pc, i, 0x3);
+      break;
+   case NV_OP_BREAKADDR:
+      emit_flow(pc, i, 0x4);
+      break;
+   case NV_OP_BREAK:
+      emit_flow(pc, i, 0x5);
+      break;
+   case NV_OP_JOINAT:
+      emit_flow(pc, i, 0xa);
+      break;
+   case NV_OP_NOP:
+   case NV_OP_JOIN:
+      pc->emit[0] = 0xf0000001;
+      pc->emit[1] = 0xe0000000;
+      break;
+   case NV_OP_PHI:
+   case NV_OP_UNDEF:
+   case NV_OP_SUB:
+      NOUVEAU_ERR("operation \"%s\" should have been eliminated\n",
+                  nv_opcode_name(i->opcode));
+      break;
+   default:
+      NOUVEAU_ERR("unhandled NV_OP: %d\n", i->opcode);
+      abort();
+      break;
+   }
+
+   if (i->is_join) {
+      assert(i->is_long && !(pc->emit[1] & 1));
+      pc->emit[1] |= 2;
+   }
+
+   assert((pc->emit[0] & 1) == i->is_long);
+}
diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c
new file mode 100644 (file)
index 0000000..1ed5032
--- /dev/null
@@ -0,0 +1,1096 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* #define NV50PC_DEBUG */
+
+#include "nv50_pc.h"
+
+#define DESCEND_ARBITRARY(j, f)                                 \
+do {                                                            \
+   b->pass_seq = ctx->pc->pass_seq;                             \
+                                                                \
+   for (j = 0; j < 2; ++j)                                      \
+      if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
+         f(ctx, b->out[j]);                                      \
+} while (0)
+
+extern unsigned nv50_inst_min_size(struct nv_instruction *);
+
+struct nv_pc_pass {
+   struct nv_pc *pc;
+};
+
+static INLINE boolean
+values_equal(struct nv_value *a, struct nv_value *b)
+{
+   /* XXX: sizes */
+   return (a->reg.file == b->reg.file && a->join->reg.id == b->join->reg.id);
+}
+
+static INLINE boolean
+inst_commutation_check(struct nv_instruction *a,
+                       struct nv_instruction *b)
+{
+   int si, di;
+
+   for (di = 0; di < 4; ++di) {
+      if (!a->def[di])
+         break;
+      for (si = 0; si < 5; ++si) {
+         if (!b->src[si])
+            continue;
+         if (values_equal(a->def[di], b->src[si]->value))
+            return FALSE;
+      }
+   }
+
+   if (b->flags_src && b->flags_src->value == a->flags_def)
+      return FALSE;
+
+   return TRUE;
+}
+
+/* Check whether we can swap the order of the instructions,
+ * where a & b may be either the earlier or the later one.
+ */
+static boolean
+inst_commutation_legal(struct nv_instruction *a,
+                      struct nv_instruction *b)
+{
+   return inst_commutation_check(a, b) && inst_commutation_check(b, a);
+}
+
+static INLINE boolean
+inst_cullable(struct nv_instruction *nvi)
+{
+   return (!(nvi->is_terminator || nvi->is_join ||
+             nvi->target ||
+             nvi->fixed ||
+             nv_nvi_refcount(nvi)));
+}
+
+static INLINE boolean
+nvi_isnop(struct nv_instruction *nvi)
+{
+   if (nvi->opcode == NV_OP_EXPORT || nvi->opcode == NV_OP_UNDEF)
+      return TRUE;
+
+   /* NOTE: 'fixed' now only means that it shouldn't be optimized away,
+    *  but we can still remove it if it is a no-op move.
+    */
+   if (/* nvi->fixed || */
+       /* nvi->flags_src || */ /* cond. MOV to same register is still NOP */
+       nvi->flags_def ||
+       nvi->is_terminator ||
+       nvi->is_join)
+      return FALSE;
+
+   if (nvi->def[0] && nvi->def[0]->join->reg.id < 0)
+      return TRUE;
+
+   if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT)
+      return FALSE;
+
+   if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file)
+      return FALSE;
+
+   if (nvi->src[0]->value->join->reg.id < 0) {
+      NV50_DBGMSG("nvi_isnop: orphaned value detected\n");
+      return TRUE;
+   }
+
+   if (nvi->opcode == NV_OP_SELECT)
+      if (!values_equal(nvi->def[0], nvi->src[1]->value))
+         return FALSE;
+
+   return values_equal(nvi->def[0], nvi->src[0]->value);
+}
+
+struct nv_pass {
+   struct nv_pc *pc;
+   int n;
+   void *priv;
+};
+
+static int
+nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b);
+
+static void
+nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
+{
+   struct nv_pc *pc = (struct nv_pc *)priv;
+   struct nv_basic_block *in;
+   struct nv_instruction *nvi, *next;
+   int j;
+   uint size, n32 = 0;
+
+   for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->bin_size; --j);
+   if (j >= 0) {
+      in = pc->bb_list[j];
+
+      /* check for no-op branches (BRA $PC+8) */
+      if (in->exit && in->exit->opcode == NV_OP_BRA && in->exit->target == b) {
+         in->bin_size -= 8;
+         pc->bin_size -= 8;
+
+         for (++j; j < pc->num_blocks; ++j)
+            pc->bb_list[j]->bin_pos -= 8;
+
+         nv_nvi_delete(in->exit);
+      }
+      b->bin_pos = in->bin_pos + in->bin_size;
+   }
+
+   pc->bb_list[pc->num_blocks++] = b;
+
+   /* visit node */
+
+   for (nvi = b->entry; nvi; nvi = next) {
+      next = nvi->next;
+      if (nvi_isnop(nvi))
+         nv_nvi_delete(nvi);
+   }
+
+   for (nvi = b->entry; nvi; nvi = next) {
+      next = nvi->next;
+
+      size = nv50_inst_min_size(nvi);
+      if (nvi->next && size < 8)
+         ++n32;
+      else
+      if ((n32 & 1) && nvi->next &&
+          nv50_inst_min_size(nvi->next) == 4 &&
+          inst_commutation_legal(nvi, nvi->next)) {
+         ++n32;
+         nv_nvi_permute(nvi, nvi->next);
+         next = nvi;
+      } else {
+         nvi->is_long = 1;
+
+         b->bin_size += n32 & 1;
+         if (n32 & 1)
+            nvi->prev->is_long = 1;
+         n32 = 0;
+      }
+      b->bin_size += 1 + nvi->is_long;
+   }
+
+   if (!b->entry) {
+      NV50_DBGMSG("block %p is now empty\n", b);
+   } else
+   if (!b->exit->is_long) {
+      assert(n32);
+      b->exit->is_long = 1;
+      b->bin_size += 1;
+
+      /* might have del'd a hole tail of instructions */
+      if (!b->exit->prev->is_long && !(n32 & 1)) {
+         b->bin_size += 1;
+         b->exit->prev->is_long = 1;
+      }
+   }
+   assert(!b->entry || (b->exit && b->exit->is_long));
+
+   pc->bin_size += b->bin_size *= 4;
+}
+
+int
+nv_pc_exec_pass2(struct nv_pc *pc)
+{
+   struct nv_pass pass;
+
+   pass.pc = pc;
+
+   pc->pass_seq++;
+   nv_pass_flatten(&pass, pc->root);
+
+   NV50_DBGMSG("preparing %u blocks for emission\n", pc->num_blocks);
+
+   pc->bb_list = CALLOC(pc->num_blocks, sizeof(struct nv_basic_block *));
+   pc->num_blocks = 0;
+
+   nv_pc_pass_in_order(pc->root, nv_pc_pass_pre_emission, pc);
+
+   return 0;
+}
+
+static INLINE boolean
+is_cmem_load(struct nv_instruction *nvi)
+{
+   return (nvi->opcode == NV_OP_LDA &&
+          nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
+          nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15));
+}
+
+static INLINE boolean
+is_smem_load(struct nv_instruction *nvi)
+{
+   return (nvi->opcode == NV_OP_LDA &&
+          (nvi->src[0]->value->reg.file == NV_FILE_MEM_S ||
+           nvi->src[0]->value->reg.file <= NV_FILE_MEM_P));
+}
+
+static INLINE boolean
+is_immd_move(struct nv_instruction *nvi)
+{
+   return (nvi->opcode == NV_OP_MOV &&
+          nvi->src[0]->value->reg.file == NV_FILE_IMM);
+}
+
+static INLINE void
+check_swap_src_0_1(struct nv_instruction *nvi)
+{
+   static const ubyte cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
+
+   struct nv_ref *src0 = nvi->src[0], *src1 = nvi->src[1];
+
+   if (!nv_op_commutative(nvi->opcode))
+      return;
+   assert(src0 && src1);
+
+   if (src1->value->reg.file == NV_FILE_IMM)
+      return;
+
+   if (is_cmem_load(src0->value->insn)) {
+      if (!is_cmem_load(src1->value->insn)) {
+         nvi->src[0] = src1;
+         nvi->src[1] = src0;
+         /* debug_printf("swapping cmem load to 1\n"); */
+      }
+   } else
+   if (is_smem_load(src1->value->insn)) {
+      if (!is_smem_load(src0->value->insn)) {
+         nvi->src[0] = src1;
+         nvi->src[1] = src0;
+         /* debug_printf("swapping smem load to 0\n"); */
+      }
+   }
+
+   if (nvi->opcode == NV_OP_SET && nvi->src[0] != src0)
+      nvi->set_cond = cc_swapped[nvi->set_cond];
+}
+
+static int
+nv_pass_fold_stores(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+   struct nv_instruction *nvi, *sti, *next;
+   int j;
+
+   for (sti = b->entry; sti; sti = next) {
+      next = sti->next;
+
+      /* only handling MOV to $oX here */
+      if (!sti->def[0] || sti->def[0]->reg.file != NV_FILE_OUT)
+         continue;
+      if (sti->opcode != NV_OP_MOV && sti->opcode != NV_OP_STA)
+         continue;
+
+      nvi = sti->src[0]->value->insn;
+      if (!nvi || nvi->opcode == NV_OP_PHI || nv_is_vector_op(nvi->opcode))
+         continue;
+      assert(nvi->def[0] == sti->src[0]->value);
+
+      if (nvi->def[0]->refc > 1)
+         continue;
+
+      /* cannot write to $oX when using immediate */
+      for (j = 0; j < 4 && nvi->src[j]; ++j)
+         if (nvi->src[j]->value->reg.file == NV_FILE_IMM)
+            break;
+      if (j < 4 && nvi->src[j])
+         continue;
+
+      nvi->def[0] = sti->def[0];
+      nvi->fixed = sti->fixed;
+
+      nv_nvi_delete(sti);
+   }
+   DESCEND_ARBITRARY(j, nv_pass_fold_stores);
+
+   return 0;
+}
+
+static int
+nv_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+   struct nv_instruction *nvi, *ld;
+   int j;
+
+   for (nvi = b->entry; nvi; nvi = nvi->next) {
+      check_swap_src_0_1(nvi);
+
+      for (j = 0; j < 3; ++j) {
+         if (!nvi->src[j])
+            break;
+         ld = nvi->src[j]->value->insn;
+         if (!ld)
+            continue;
+
+         if (is_immd_move(ld) && nv50_nvi_can_use_imm(nvi, j)) {
+            nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value);
+            continue;
+         }
+
+         if (ld->opcode != NV_OP_LDA)
+            continue;
+         if (!nv50_nvi_can_load(nvi, j, ld->src[0]->value))
+            continue;
+
+         if (j == 0 && ld->src[4]) /* can't load shared mem */
+            continue;
+
+         /* fold it ! */ /* XXX: ref->insn */
+         nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value);
+         if (ld->src[4])
+            nv_reference(ctx->pc, &nvi->src[4], ld->src[4]->value);
+
+         if (!nv_nvi_refcount(ld))
+            nv_nvi_delete(ld);
+      }
+   }
+   DESCEND_ARBITRARY(j, nv_pass_fold_loads);
+
+   return 0;
+}
+
+static int
+nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+   int j;
+   struct nv_instruction *nvi, *mi, *next;
+   ubyte mod;
+
+   for (nvi = b->entry; nvi; nvi = next) {
+      next = nvi->next;
+      if (nvi->opcode == NV_OP_SUB) {
+         nvi->opcode = NV_OP_ADD;
+         nvi->src[1]->mod ^= NV_MOD_NEG;
+      }
+
+      /* should not put any modifiers on NEG and ABS */
+      assert(nvi->opcode != NV_MOD_NEG || !nvi->src[0]->mod);
+      assert(nvi->opcode != NV_MOD_ABS || !nvi->src[0]->mod);
+
+      for (j = 0; j < 4; ++j) {
+         if (!nvi->src[j])
+            break;
+
+         mi = nvi->src[j]->value->insn;
+         if (!mi)
+            continue;
+         if (mi->def[0]->refc > 1)
+            continue;
+
+         if (mi->opcode == NV_OP_NEG) mod = NV_MOD_NEG;
+         else
+         if (mi->opcode == NV_OP_ABS) mod = NV_MOD_ABS;
+         else
+            continue;
+
+         if (nvi->opcode == NV_OP_ABS)
+            mod &= ~(NV_MOD_NEG | NV_MOD_ABS);
+         else
+         if (nvi->opcode == NV_OP_NEG && mod == NV_MOD_NEG) {
+            nvi->opcode = NV_OP_MOV;
+            mod = 0;
+         }
+
+         if (!(nv50_supported_src_mods(nvi->opcode, j) & mod))
+            continue;
+
+         nv_reference(ctx->pc, &nvi->src[j], mi->src[0]->value);
+
+         nvi->src[j]->mod ^= mod;
+      }
+
+      if (nvi->opcode == NV_OP_SAT) {
+         mi = nvi->src[0]->value->insn;
+
+         if ((mi->opcode == NV_OP_MAD) && !mi->flags_def) {
+            mi->saturate = 1;
+            mi->def[0] = nvi->def[0];
+            nv_nvi_delete(nvi);
+         }
+      }
+   }
+   DESCEND_ARBITRARY(j, nv_pass_lower_mods);
+
+   return 0;
+}
+
+#define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
+
+static void
+modifiers_apply(uint32_t *val, ubyte type, ubyte mod)
+{
+   if (mod & NV_MOD_ABS) {
+      if (type == NV_TYPE_F32)
+         *val &= 0x7fffffff;
+      else
+      if ((*val) & (1 << 31))
+         *val = ~(*val) + 1;
+   }
+   if (mod & NV_MOD_NEG) {
+      if (type == NV_TYPE_F32)
+         *val ^= 0x80000000;
+      else
+         *val = ~(*val) + 1;
+   }
+}
+
+static INLINE uint
+modifiers_opcode(ubyte mod)
+{
+   switch (mod) {
+   case NV_MOD_NEG: return NV_OP_NEG;
+   case NV_MOD_ABS: return NV_OP_ABS;
+   case 0:
+      return NV_OP_MOV;
+   default:
+      return NV_OP_NOP;
+   }
+}
+
+static void
+constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,
+                    struct nv_value *src0, struct nv_value *src1)
+{
+   struct nv_value *val;
+   union {
+      float f32;
+      uint32_t u32;
+      int32_t s32;
+   } u0, u1, u;
+   ubyte type;
+
+   if (!nvi->def[0])
+      return;
+   type = nvi->def[0]->reg.type;
+
+   u.u32 = 0;
+   u0.u32 = src0->reg.imm.u32;
+   u1.u32 = src1->reg.imm.u32;
+
+   modifiers_apply(&u0.u32, type, nvi->src[0]->mod);
+   modifiers_apply(&u1.u32, type, nvi->src[1]->mod);
+
+   switch (nvi->opcode) {
+   case NV_OP_MAD:
+      if (nvi->src[2]->value->reg.file != NV_FILE_GPR)
+         return;
+      /* fall through */
+   case NV_OP_MUL:
+      switch (type) {
+      case NV_TYPE_F32: u.f32 = u0.f32 * u1.f32; break;
+      case NV_TYPE_U32: u.u32 = u0.u32 * u1.u32; break;
+      case NV_TYPE_S32: u.s32 = u0.s32 * u1.s32; break;
+      default:
+         assert(0);
+         break;
+      }
+      break;
+   case NV_OP_ADD:
+      switch (type) {
+      case NV_TYPE_F32: u.f32 = u0.f32 + u1.f32; break;
+      case NV_TYPE_U32: u.u32 = u0.u32 + u1.u32; break;
+      case NV_TYPE_S32: u.s32 = u0.s32 + u1.s32; break;
+      default:
+         assert(0);
+         break;
+      }
+      break;
+   case NV_OP_SUB:
+      switch (type) {
+      case NV_TYPE_F32: u.f32 = u0.f32 - u1.f32; break;
+      case NV_TYPE_U32: u.u32 = u0.u32 - u1.u32; break;
+      case NV_TYPE_S32: u.s32 = u0.s32 - u1.s32; break;
+      default:
+         assert(0);
+         break;
+      }
+      break;
+   default:
+      return;
+   }
+
+   nvi->opcode = NV_OP_MOV;
+
+   val = new_value(pc, NV_FILE_IMM, type);
+
+   val->reg.imm.u32 = u.u32;
+
+   nv_reference(pc, &nvi->src[1], NULL);
+   nv_reference(pc, &nvi->src[0], val);
+
+   if (nvi->src[2]) { /* from MAD */
+      nvi->src[1] = nvi->src[0];
+      nvi->src[0] = nvi->src[2];
+      nvi->src[2] = NULL;
+      nvi->opcode = NV_OP_ADD;
+   }
+}
+
+static void
+constant_operand(struct nv_pc *pc,
+                 struct nv_instruction *nvi, struct nv_value *val, int s)
+{
+   union {
+      float f32;
+      uint32_t u32;
+      int32_t s32;
+   } u;
+   int t = s ? 0 : 1;
+   uint op;
+   ubyte type;
+
+   if (!nvi->def[0])
+      return;
+   type = nvi->def[0]->reg.type;
+
+   u.u32 = val->reg.imm.u32;
+   modifiers_apply(&u.u32, type, nvi->src[s]->mod);
+
+   switch (nvi->opcode) {
+   case NV_OP_MUL:
+      if ((type == NV_TYPE_F32 && u.f32 == 1.0f) ||
+          (NV_TYPE_ISINT(type) && u.u32 == 1)) {
+         if ((op = modifiers_opcode(nvi->src[t]->mod)) == NV_OP_NOP)
+            break;
+         nvi->opcode = op;
+         nv_reference(pc, &nvi->src[s], NULL);
+         nvi->src[0] = nvi->src[t];
+         nvi->src[1] = NULL;
+      } else
+      if ((type == NV_TYPE_F32 && u.f32 == 2.0f) ||
+          (NV_TYPE_ISINT(type) && u.u32 == 2)) {
+         nvi->opcode = NV_OP_ADD;
+         nv_reference(pc, &nvi->src[s], nvi->src[t]->value);
+         nvi->src[s]->mod = nvi->src[t]->mod;
+      } else
+      if (type == NV_TYPE_F32 && u.f32 == -1.0f) {
+         if (nvi->src[t]->mod & NV_MOD_NEG)
+            nvi->opcode = NV_OP_MOV;
+         else
+            nvi->opcode = NV_OP_NEG;
+         nv_reference(pc, &nvi->src[s], NULL);
+         nvi->src[0] = nvi->src[t];
+         nvi->src[1] = NULL;
+      } else
+      if (type == NV_TYPE_F32 && u.f32 == -2.0f) {
+         nvi->opcode = NV_OP_ADD;
+         nv_reference(pc, &nvi->src[s], nvi->src[t]->value);
+         nvi->src[s]->mod = (nvi->src[t]->mod ^= NV_MOD_NEG);
+      } else
+      if (u.u32 == 0) {
+         nvi->opcode = NV_OP_MOV;
+         nv_reference(pc, &nvi->src[t], NULL);
+         if (s) {
+            nvi->src[0] = nvi->src[1];
+            nvi->src[1] = NULL;
+         }
+      }
+      break;
+   case NV_OP_ADD:
+      if (u.u32 == 0) {
+         if ((op = modifiers_opcode(nvi->src[t]->mod)) == NV_OP_NOP)
+            break;
+         nvi->opcode = op;
+         nv_reference(pc, &nvi->src[s], NULL);
+         nvi->src[0] = nvi->src[t];
+         nvi->src[1] = NULL;
+      }
+      break;
+   case NV_OP_RCP:
+      u.f32 = 1.0f / u.f32;
+      (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
+      nvi->opcode = NV_OP_MOV;
+      assert(s == 0);
+      nv_reference(pc, &nvi->src[0], val);
+      break;
+   case NV_OP_RSQ:
+      u.f32 = 1.0f / sqrtf(u.f32);
+      (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
+      nvi->opcode = NV_OP_MOV;
+      assert(s == 0);
+      nv_reference(pc, &nvi->src[0], val);
+      break;
+   default:
+      break;
+   }
+
+   if (nvi->opcode == NV_OP_MOV && nvi->flags_def) {
+      struct nv_instruction *cvt = new_instruction_at(pc, nvi, NV_OP_CVT);
+
+      nv_reference(pc, &cvt->src[0], nvi->def[0]);
+
+      cvt->flags_def = nvi->flags_def;
+      nvi->flags_def = NULL;
+   }
+}
+
+static int
+nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+   struct nv_instruction *nvi, *next;
+   int j;
+
+   for (nvi = b->entry; nvi; nvi = next) {
+      struct nv_value *src0, *src1, *src;
+      int mod;
+
+      next = nvi->next;
+
+      src0 = nvcg_find_immediate(nvi->src[0]);
+      src1 = nvcg_find_immediate(nvi->src[1]);
+
+      if (src0 && src1)
+         constant_expression(ctx->pc, nvi, src0, src1);
+      else {
+         if (src0)
+            constant_operand(ctx->pc, nvi, src0, 0);
+         else
+         if (src1)
+            constant_operand(ctx->pc, nvi, src1, 1);
+      }
+
+      /* try to combine MUL, ADD into MAD */
+      if (nvi->opcode != NV_OP_ADD)
+         continue;
+
+      src0 = nvi->src[0]->value;
+      src1 = nvi->src[1]->value;
+
+      if (SRC_IS_MUL(src0) && src0->refc == 1)
+         src = src0;
+      else
+      if (SRC_IS_MUL(src1) && src1->refc == 1)
+         src = src1;
+      else
+         continue;
+
+      nvi->opcode = NV_OP_MAD;
+      mod = nvi->src[(src == src0) ? 0 : 1]->mod;
+      nv_reference(ctx->pc, &nvi->src[(src == src0) ? 0 : 1], NULL);
+      nvi->src[2] = nvi->src[(src == src0) ? 1 : 0];
+
+      assert(!(mod & ~NV_MOD_NEG));
+      nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value);
+      nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value);
+      nvi->src[0]->mod = src->insn->src[0]->mod ^ mod;
+      nvi->src[1]->mod = src->insn->src[1]->mod;
+   }
+   DESCEND_ARBITRARY(j, nv_pass_lower_arith);
+
+   return 0;
+}
+
+/* TODO: redundant store elimination */
+
+struct load_record {
+   struct load_record *next;
+   uint64_t data;
+   struct nv_value *value;
+};
+
+#define LOAD_RECORD_POOL_SIZE 1024
+
+struct nv_pass_reld_elim {
+   struct nv_pc *pc;
+
+   struct load_record *imm;
+   struct load_record *mem_s;
+   struct load_record *mem_v;
+   struct load_record *mem_c[16];
+   struct load_record *mem_l;
+
+   struct load_record pool[LOAD_RECORD_POOL_SIZE];
+   int alloc;
+};
+
+static int
+nv_pass_reload_elim(struct nv_pass_reld_elim *ctx, struct nv_basic_block *b)
+{
+   struct load_record **rec, *it;
+   struct nv_instruction *ld, *next;
+   uint64_t data;
+   struct nv_value *val;
+   int j;
+
+   for (ld = b->entry; ld; ld = next) {
+      next = ld->next;
+      if (!ld->src[0])
+         continue;
+      val = ld->src[0]->value;
+      rec = NULL;
+
+      if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) {
+         data = val->reg.id;
+         rec = &ctx->mem_v;
+      } else
+      if (ld->opcode == NV_OP_LDA) {
+         data = val->reg.id;
+         if (val->reg.file >= NV_FILE_MEM_C(0) &&
+             val->reg.file <= NV_FILE_MEM_C(15))
+            rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)];
+         else
+         if (val->reg.file == NV_FILE_MEM_S)
+            rec = &ctx->mem_s;
+         else
+         if (val->reg.file == NV_FILE_MEM_L)
+            rec = &ctx->mem_l;
+      } else
+      if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) {
+         data = val->reg.imm.u32;
+         rec = &ctx->imm;
+      }
+
+      if (!rec || !ld->def[0]->refc)
+         continue;
+
+      for (it = *rec; it; it = it->next)
+         if (it->data == data)
+            break;
+
+      if (it) {
+         if (ld->def[0]->reg.id >= 0)
+            it->value = ld->def[0];
+         else
+         if (!ld->fixed)
+            nvcg_replace_value(ctx->pc, ld->def[0], it->value);
+      } else {
+         if (ctx->alloc == LOAD_RECORD_POOL_SIZE)
+            continue;
+         it = &ctx->pool[ctx->alloc++];
+         it->next = *rec;
+         it->data = data;
+         it->value = ld->def[0];
+         *rec = it;
+      }
+   }
+
+   ctx->imm = NULL;
+   ctx->mem_s = NULL;
+   ctx->mem_v = NULL;
+   for (j = 0; j < 16; ++j)
+      ctx->mem_c[j] = NULL;
+   ctx->mem_l = NULL;
+   ctx->alloc = 0;
+
+   DESCEND_ARBITRARY(j, nv_pass_reload_elim);
+
+   return 0;
+}
+
+static int
+nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+   int i, c, j;
+
+   for (i = 0; i < ctx->pc->num_instructions; ++i) {
+      struct nv_instruction *nvi = &ctx->pc->instructions[i];
+      struct nv_value *def[4];
+
+      if (!nv_is_vector_op(nvi->opcode))
+         continue;
+      nvi->tex_mask = 0;
+
+      for (c = 0; c < 4; ++c) {
+         if (nvi->def[c]->refc)
+            nvi->tex_mask |= 1 << c;
+         def[c] = nvi->def[c];
+      }
+
+      j = 0;
+      for (c = 0; c < 4; ++c)
+         if (nvi->tex_mask & (1 << c))
+            nvi->def[j++] = def[c];
+      for (c = 0; c < 4; ++c)
+         if (!(nvi->tex_mask & (1 << c)))
+           nvi->def[j++] = def[c];
+      assert(j == 4);
+   }
+   return 0;
+}
+
+struct nv_pass_dce {
+   struct nv_pc *pc;
+   uint removed;
+};
+
+static int
+nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
+{
+   int j;
+   struct nv_instruction *nvi, *next;
+
+   for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) {
+      next = nvi->next;
+
+      if (inst_cullable(nvi)) {
+         nv_nvi_delete(nvi);
+
+         ++ctx->removed;
+      }
+   }
+   DESCEND_ARBITRARY(j, nv_pass_dce);
+
+   return 0;
+}
+
+/* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.
+ * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with
+ * BREAK and dummy ELSE block.
+ */
+static INLINE boolean
+bb_is_if_else_endif(struct nv_basic_block *bb)
+{
+   if (!bb->out[0] || !bb->out[1])
+      return FALSE;
+
+   if (bb->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) {
+      return (bb->out[0]->out[1] == bb->out[1]->out[0] &&
+              !bb->out[1]->out[1]);
+   } else {
+      return (bb->out[0]->out[0] == bb->out[1]->out[0] &&
+              !bb->out[0]->out[1] &&
+              !bb->out[1]->out[1]);
+   }
+}
+
+/* predicate instructions and remove branch at the end */
+static void
+predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b,
+                       struct nv_value *p, ubyte cc)
+{
+   struct nv_instruction *nvi;
+
+   if (!b->entry)
+      return;
+   for (nvi = b->entry; nvi->next; nvi = nvi->next) {
+      if (!nvi_isnop(nvi)) {
+         nvi->cc = cc;
+         nv_reference(pc, &nvi->flags_src, p);
+      }
+   }
+
+   if (nvi->opcode == NV_OP_BRA)
+      nv_nvi_delete(nvi);
+   else
+   if (!nvi_isnop(nvi)) {
+      nvi->cc = cc;
+      nv_reference(pc, &nvi->flags_src, p);
+   }
+}
+
+/* NOTE: Run this after register allocation, we can just cut out the cflow
+ * instructions and hook the predicates to the conditional OPs if they are
+ * not using immediates; better than inserting SELECT to join definitions.
+ *
+ * NOTE: Should adapt prior optimization to make this possible more often.
+ */
+static int
+nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+   struct nv_instruction *nvi;
+   struct nv_value *pred;
+   int i;
+   int n0 = 0, n1 = 0;
+
+   if (bb_is_if_else_endif(b)) {
+
+      NV50_DBGMSG("pass_flatten: IF/ELSE/ENDIF construct at BB:%i\n", b->id);
+
+      for (n0 = 0, nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0)
+         if (!nv50_nvi_can_predicate(nvi))
+            break;
+      if (!nvi) {
+         for (n1 = 0, nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1)
+            if (!nv50_nvi_can_predicate(nvi))
+               break;
+#ifdef NV50_PC_DEBUG
+         if (nvi) {
+            debug_printf("cannot predicate: "); nv_print_instruction(nvi);
+         }
+      } else {
+         debug_printf("cannot predicate: "); nv_print_instruction(nvi);
+#endif
+      }
+
+      if (!nvi && n0 < 12 && n1 < 12) { /* 12 as arbitrary limit */
+         assert(b->exit && b->exit->flags_src);
+         pred = b->exit->flags_src->value;
+
+         predicate_instructions(ctx->pc, b->out[0], pred, NV_CC_NE | NV_CC_U);
+         predicate_instructions(ctx->pc, b->out[1], pred, NV_CC_EQ);
+
+         assert(b->exit && b->exit->opcode == NV_OP_BRA);
+         nv_nvi_delete(b->exit);
+
+         if (b->exit && b->exit->opcode == NV_OP_JOINAT)
+            nv_nvi_delete(b->exit);
+
+         i = (b->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) ? 1 : 0;
+
+         if ((nvi = b->out[0]->out[i]->entry)) {
+            nvi->is_join = 0;
+            if (nvi->opcode == NV_OP_JOIN)
+               nv_nvi_delete(nvi);
+         }
+      }
+   }
+   DESCEND_ARBITRARY(i, nv_pass_flatten);
+
+   return 0;
+}
+
+/* local common subexpression elimination, stupid O(n^2) implementation */
+static int
+nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+   struct nv_instruction *ir, *ik, *next;
+   struct nv_instruction *entry = b->phi ? b->phi : b->entry;
+   int s;
+   unsigned int reps;
+
+   do {
+      reps = 0;
+      for (ir = entry; ir; ir = next) {
+         next = ir->next;
+         for (ik = entry; ik != ir; ik = ik->next) {
+            if (ir->opcode != ik->opcode || ir->fixed)
+               continue;
+
+            if (!ir->def[0] || !ik->def[0] ||
+                ik->opcode == NV_OP_LDA ||
+                ik->opcode == NV_OP_STA ||
+                ik->opcode == NV_OP_MOV ||
+                nv_is_vector_op(ik->opcode))
+               continue; /* ignore loads, stores & moves */
+
+            if (ik->src[4] || ir->src[4])
+               continue; /* don't mess with address registers */
+
+            if (ik->flags_src || ir->flags_src ||
+                ik->flags_def || ir->flags_def)
+               continue; /* and also not with flags, for now */
+
+            if (ik->def[0]->reg.file == NV_FILE_OUT ||
+                ir->def[0]->reg.file == NV_FILE_OUT ||
+                !values_equal(ik->def[0], ir->def[0]))
+               continue;
+
+            for (s = 0; s < 3; ++s) {
+               struct nv_value *a, *b;
+
+               if (!ik->src[s]) {
+                  if (ir->src[s])
+                     break;
+                  continue;
+               }
+               if (ik->src[s]->mod != ir->src[s]->mod)
+                  break;
+               a = ik->src[s]->value;
+               b = ir->src[s]->value;
+               if (a == b)
+                  continue;
+               if (a->reg.file != b->reg.file ||
+                   a->reg.id < 0 ||
+                   a->reg.id != b->reg.id)
+                  break;
+            }
+            if (s == 3) {
+               nv_nvi_delete(ir);
+               ++reps;
+               nvcg_replace_value(ctx->pc, ir->def[0], ik->def[0]);
+               break;
+            }
+         }
+      }
+   } while(reps);
+
+   DESCEND_ARBITRARY(s, nv_pass_cse);
+
+   return 0;
+}
+
+int
+nv_pc_exec_pass0(struct nv_pc *pc)
+{
+   struct nv_pass_reld_elim *reldelim;
+   struct nv_pass pass;
+   struct nv_pass_dce dce;
+   int ret;
+
+   pass.n = 0;
+   pass.pc = pc;
+
+   /* Do this first, so we don't have to pay attention
+    * to whether sources are supported memory loads.
+    */
+   pc->pass_seq++;
+   ret = nv_pass_lower_arith(&pass, pc->root);
+   if (ret)
+      return ret;
+
+   pc->pass_seq++;
+   ret = nv_pass_fold_loads(&pass, pc->root);
+   if (ret)
+      return ret;
+
+   pc->pass_seq++;
+   ret = nv_pass_fold_stores(&pass, pc->root);
+   if (ret)
+      return ret;
+
+   reldelim = CALLOC_STRUCT(nv_pass_reld_elim);
+   reldelim->pc = pc;
+   pc->pass_seq++;
+   ret = nv_pass_reload_elim(reldelim, pc->root);
+   FREE(reldelim);
+   if (ret)
+      return ret;
+
+   pc->pass_seq++;
+   ret = nv_pass_cse(&pass, pc->root);
+   if (ret)
+      return ret;
+
+   pc->pass_seq++;
+   ret = nv_pass_lower_mods(&pass, pc->root);
+   if (ret)
+      return ret;
+
+   dce.pc = pc;
+   do {
+      dce.removed = 0;
+      pc->pass_seq++;
+      ret = nv_pass_dce(&dce, pc->root);
+      if (ret)
+         return ret;
+   } while (dce.removed);
+
+   ret = nv_pass_tex_mask(&pass, pc->root);
+   if (ret)
+      return ret;
+
+   return ret;
+}
diff --git a/src/gallium/drivers/nv50/nv50_pc_print.c b/src/gallium/drivers/nv50/nv50_pc_print.c
new file mode 100644 (file)
index 0000000..01a6f00
--- /dev/null
@@ -0,0 +1,315 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nv50_context.h"
+#include "nv50_pc.h"
+
+#define NVXX_DEBUG 0
+
+#define PRINT(args...) debug_printf(args)
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
+#endif
+
+static const char *norm = "\x1b[00m";
+static const char *gree = "\x1b[32m";
+static const char *blue = "\x1b[34m";
+static const char *cyan = "\x1b[36m";
+static const char *orng = "\x1b[33m";
+static const char *mgta = "\x1b[35m";
+
+static const char *nv_opcode_names[NV_OP_COUNT + 1] = {
+   "phi",
+   "extract",
+   "combine",
+   "lda",
+   "sta",
+   "mov",
+   "add",
+   "sub",
+   "neg",
+   "mul",
+   "mad",
+   "cvt",
+   "sat",
+   "not",
+   "and",
+   "or",
+   "xor",
+   "shl",
+   "shr",
+   "rcp",
+   "undef",
+   "rsqrt",
+   "lg2",
+   "sin",
+   "cos",
+   "ex2",
+   "presin",
+   "preex2",
+   "min",
+   "max",
+   "set",
+   "sad",
+   "kil",
+   "bra",
+   "call",
+   "ret",
+   "break",
+   "breakaddr",
+   "joinat",
+   "tex",
+   "texbias",
+   "texlod",
+   "texfetch",
+   "texsize",
+   "dfdx",
+   "dfdy",
+   "quadop",
+   "linterp",
+   "pinterp",
+   "abs",
+   "ceil",
+   "floor",
+   "trunc",
+   "nop",
+   "select",
+   "export",
+   "join",
+   "BAD_OP"
+};
+
+static const char *nv_cond_names[] =
+{
+   "never", "lt" , "eq" , "le" , "gt" , "ne" , "ge" , "",
+   "never", "ltu", "equ", "leu", "gtu", "neu", "geu", "",
+   "o", "c", "a", "s"
+};
+
+static const char *nv_modifier_strings[] =
+{
+   "",
+   "neg",
+   "abs",
+   "neg abs",
+   "not",
+   "not neg"
+   "not abs",
+   "not neg abs",
+   "sat",
+   "BAD_MOD"
+};
+
+const char *
+nv_opcode_name(uint opcode)
+{
+   return nv_opcode_names[MIN2(opcode, ARRAY_SIZE(nv_opcode_names) - 1)];
+}
+
+static INLINE const char *
+nv_type_name(ubyte type)
+{
+   switch (type) {
+   case NV_TYPE_U16: return "u16";
+   case NV_TYPE_S16: return "s16";
+   case NV_TYPE_F32: return "f32";
+   case NV_TYPE_U32: return "u32";
+   case NV_TYPE_S32: return "s32";
+   case NV_TYPE_P32: return "p32";
+   case NV_TYPE_F64: return "f64";
+   default:
+      return "BAD_TYPE";
+   }
+}
+
+static INLINE const char *
+nv_cond_name(ubyte cc)
+{
+   return nv_cond_names[MIN2(cc, 19)];
+}
+
+static INLINE const char *
+nv_modifier_string(ubyte mod)
+{
+   return nv_modifier_strings[MIN2(mod, 9)];
+}
+
+static INLINE int
+nv_value_id(struct nv_value *value)
+{
+   if (value->join->reg.id >= 0)
+      return value->join->reg.id;
+   return value->n;
+}
+
+static INLINE boolean
+nv_value_allocated(struct nv_value *value)
+{
+   return (value->reg.id >= 0) ? TRUE : FALSE;
+}
+
+static INLINE void
+nv_print_address(const char c, int buf, struct nv_value *a, int offset)
+{
+   if (buf >= 0)
+      PRINT(" %s%c%i[", cyan, c, buf);
+   else
+      PRINT(" %s%c[", cyan, c);
+   if (a)
+      PRINT("%s$a%i%s+", mgta, nv_value_id(a), cyan);
+   PRINT("%s0x%x%s]", orng, offset, cyan);
+}
+
+static INLINE void
+nv_print_cond(struct nv_instruction *nvi)
+{
+   char pfx = nv_value_allocated(nvi->flags_src->value->join) ? '$' : '%';
+
+   PRINT("%s%s %s%cc%i ",
+         gree, nv_cond_name(nvi->cc),
+         mgta, pfx, nv_value_id(nvi->flags_src->value));
+}
+
+static INLINE void
+nv_print_value(struct nv_value *value, struct nv_value *ind, ubyte type)
+{
+   char reg_pfx = '$';
+
+   if (type == NV_TYPE_ANY)
+      type = value->reg.type;
+
+   if (value->reg.file != NV_FILE_FLAGS)
+      PRINT(" %s%s", gree, nv_type_name(type));
+
+   if (!nv_value_allocated(value->join))
+      reg_pfx = '%';
+
+   switch (value->reg.file) {
+   case NV_FILE_GPR:
+      PRINT(" %s%cr%i", blue, reg_pfx, nv_value_id(value));
+      break;
+   case NV_FILE_OUT:
+      PRINT(" %s%co%i", mgta, reg_pfx, nv_value_id(value));
+      break;
+   case NV_FILE_ADDR:
+      PRINT(" %s%ca%i", mgta, reg_pfx, nv_value_id(value));
+      break;
+   case NV_FILE_FLAGS:
+      PRINT(" %s%cc%i", mgta, reg_pfx, nv_value_id(value));
+      break;
+   case NV_FILE_MEM_S:
+      nv_print_address('s', -1, ind, 4 * nv_value_id(value));
+      break;
+   case NV_FILE_MEM_P:
+      nv_print_address('p', -1, ind, 4 * nv_value_id(value));
+      break;
+   case NV_FILE_MEM_V:
+      nv_print_address('v', -1, ind, 4 * nv_value_id(value));
+      break;
+   case NV_FILE_IMM:
+      switch (type) {
+      case NV_TYPE_U16:
+      case NV_TYPE_S16:
+         PRINT(" %s0x%04x", orng, value->reg.imm.u32);
+         break;
+      case NV_TYPE_F32:
+         PRINT(" %s%f", orng, value->reg.imm.f32);
+         break;
+      case NV_TYPE_F64:
+         PRINT(" %s%f", orng, value->reg.imm.f64);
+         break;
+      case NV_TYPE_U32:
+      case NV_TYPE_S32:
+      case NV_TYPE_P32:
+         PRINT(" %s0x%08x", orng, value->reg.imm.u32);
+         break;
+      }
+      break;
+   default:
+      if (value->reg.file >= NV_FILE_MEM_G(0) &&
+          value->reg.file <= NV_FILE_MEM_G(15))
+         nv_print_address('g', value->reg.file - NV_FILE_MEM_G(0), ind,
+                          nv_value_id(value) * 4);
+      else
+      if (value->reg.file >= NV_FILE_MEM_C(0) &&
+          value->reg.file <= NV_FILE_MEM_C(15))
+         nv_print_address('c', value->reg.file - NV_FILE_MEM_C(0), ind,
+                          nv_value_id(value) * 4);
+      else
+         NOUVEAU_ERR(" BAD_FILE[%i]", nv_value_id(value));
+      break;
+   }
+}
+
+static INLINE void
+nv_print_ref(struct nv_ref *ref, struct nv_value *ind)
+{
+   nv_print_value(ref->value, ind, ref->typecast);
+}
+
+void
+nv_print_instruction(struct nv_instruction *i)
+{
+   int j;
+
+   PRINT("%i: ", i->serial);
+
+   if (i->flags_src)
+      nv_print_cond(i);
+
+   PRINT("%s", gree);
+   if (i->opcode == NV_OP_SET)
+      PRINT("set %s", nv_cond_name(i->set_cond));
+   else
+   if (i->saturate)
+      PRINT("sat %s", nv_opcode_name(i->opcode));
+   else
+      PRINT("%s", nv_opcode_name(i->opcode));
+
+   if (i->flags_def)
+      nv_print_value(i->flags_def, NULL, NV_TYPE_ANY);
+
+   /* Only STORE & STA can write to MEM, and they do not def
+    * anything, so the address is thus part of the source.
+    */
+   if (i->def[0])
+      nv_print_value(i->def[0], NULL, NV_TYPE_ANY);
+   else
+   if (i->target)
+      PRINT(" %s(BB:%i)", orng, i->target->id);
+   else
+      PRINT(" #");
+
+   for (j = 0; j < 4; ++j) {
+      if (!i->src[j])
+         continue;
+
+      if (i->src[j]->mod)
+         PRINT(" %s%s", gree, nv_modifier_string(i->src[j]->mod));
+
+      nv_print_ref(i->src[j],
+                   (j == nv50_indirect_opnd(i)) ?
+                   i->src[4]->value : NULL);
+   }
+   PRINT(" %s%c\n", norm, i->is_long ? 'l' : 's');
+}
diff --git a/src/gallium/drivers/nv50/nv50_pc_regalloc.c b/src/gallium/drivers/nv50/nv50_pc_regalloc.c
new file mode 100644 (file)
index 0000000..d401706
--- /dev/null
@@ -0,0 +1,946 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* #define NV50PC_DEBUG */
+
+/* #define NV50_RA_DEBUG_LIVEI */
+/* #define NV50_RA_DEBUG_LIVE_SETS */
+/* #define NV50_RA_DEBUG_JOIN */
+
+#include "nv50_context.h"
+#include "nv50_pc.h"
+
+#include "util/u_simple_list.h"
+
+#define NUM_REGISTER_FILES 4
+
+struct register_set {
+   struct nv_pc *pc;
+
+   uint32_t last[NUM_REGISTER_FILES];
+   uint32_t bits[NUM_REGISTER_FILES][8];
+};
+
+struct nv_pc_pass {
+   struct nv_pc *pc;
+
+   struct nv_instruction **insns;
+   int num_insns;
+
+   uint pass_seq;
+};
+
+static void
+ranges_coalesce(struct nv_range *range)
+{
+   while (range->next && range->end >= range->next->bgn) {
+      struct nv_range *rnn = range->next->next;
+      assert(range->bgn <= range->next->bgn);
+      range->end = MAX2(range->end, range->next->end);
+      FREE(range->next);
+      range->next = rnn;
+   }
+}
+
+static boolean
+add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range)
+{
+   struct nv_range *range, **nextp = &val->livei;
+
+   for (range = val->livei; range; range = range->next) {
+      if (end < range->bgn)
+         break; /* insert before */
+
+      if (bgn > range->end) {
+         nextp = &range->next;
+         continue; /* insert after */
+      }
+
+      /* overlap */
+      if (bgn < range->bgn) {
+         range->bgn = bgn;
+         if (end > range->end)
+            range->end = end;
+         ranges_coalesce(range);
+         return TRUE;
+      }
+      if (end > range->end) {
+         range->end = end;
+         ranges_coalesce(range);
+         return TRUE;
+      }
+      assert(bgn >= range->bgn);
+      assert(end <= range->end);
+      return TRUE;
+   }
+
+   if (!new_range)
+      new_range = CALLOC_STRUCT(nv_range);
+
+   new_range->bgn = bgn;
+   new_range->end = end;
+   new_range->next = range;
+   *(nextp) = new_range;
+   return FALSE;
+}
+
+static void
+add_range(struct nv_value *val, struct nv_basic_block *b, int end)
+{
+   int bgn;
+
+   if (!val->insn) /* ignore non-def values */
+      return;
+   assert(b->entry->serial <= b->exit->serial);
+   assert(b->phi->serial <= end);
+   assert(b->exit->serial + 1 >= end);
+
+   bgn = val->insn->serial;
+   if (bgn < b->entry->serial || bgn > b->exit->serial)
+      bgn = b->entry->serial;
+
+   assert(bgn <= end);
+
+   add_range_ex(val, bgn, end, NULL);
+}
+
+#if defined(NV50_RA_DEBUG_JOIN) || defined(NV50_RA_DEBUG_LIVEI)
+static void
+livei_print(struct nv_value *a)
+{
+   struct nv_range *r = a->livei;
+
+   debug_printf("livei %i: ", a->n);
+   while (r) {
+      debug_printf("[%i, %i) ", r->bgn, r->end);
+      r = r->next;
+   }
+   debug_printf("\n");
+}
+#endif
+
+static void
+livei_unify(struct nv_value *dst, struct nv_value *src)
+{
+   struct nv_range *range, *next;
+
+   for (range = src->livei; range; range = next) {
+      next = range->next;
+      if (add_range_ex(dst, range->bgn, range->end, range))
+         FREE(range);
+   }
+   src->livei = NULL;
+}
+
+static void
+livei_release(struct nv_value *val)
+{
+   struct nv_range *range, *next;
+
+   for (range = val->livei; range; range = next) {
+      next = range->next;
+      FREE(range);
+   }
+}
+
+static boolean
+livei_have_overlap(struct nv_value *a, struct nv_value *b)
+{
+   struct nv_range *r_a, *r_b;
+
+   for (r_a = a->livei; r_a; r_a = r_a->next) {
+      for (r_b = b->livei; r_b; r_b = r_b->next) {
+         if (r_b->bgn < r_a->end &&
+             r_b->end > r_a->bgn)
+            return TRUE;
+      }
+   }
+   return FALSE;
+}
+
+static int
+livei_end(struct nv_value *a)
+{
+   struct nv_range *r = a->livei;
+
+   assert(r);
+   while (r->next)
+      r = r->next;
+   return r->end;
+}
+
+static boolean
+livei_contains(struct nv_value *a, int pos)
+{
+   struct nv_range *r;
+
+   for (r = a->livei; r && r->bgn <= pos; r = r->next)
+      if (r->end > pos)
+         return TRUE;
+   return FALSE;
+}
+
+static boolean
+reg_assign(struct register_set *set, struct nv_value **def, int n)
+{
+   int i, id, s;
+   uint m;
+   int f = def[0]->reg.file;
+
+   s = n << (nv_type_order(def[0]->reg.type) - 1);
+   m = (1 << s) - 1;
+
+   id = set->last[f];
+
+   for (i = 0; i * 32 < set->last[f]; ++i) {
+      if (set->bits[f][i] == 0xffffffff)
+         continue;
+
+      for (id = 0; id < 32; id += s)
+         if (!(set->bits[f][i] & (m << id)))
+            break;
+      if (id < 32)
+         break;
+   }
+   if (i * 32 + id > set->last[f])
+      return FALSE;
+
+   set->bits[f][i] |= m << id;
+
+   id += i * 32;
+
+   set->pc->max_reg[f] = MAX2(set->pc->max_reg[f], id + s - 1);
+
+   id >>= nv_type_order(def[0]->reg.type) - 1;
+
+   for (i = 0; i < n; ++i)
+      if (def[i]->livei)
+         def[i]->reg.id = id++;
+
+   return TRUE;
+}
+
+static INLINE void
+reg_occupy(struct register_set *set, struct nv_value *val)
+{
+   int s, id = val->reg.id, f = val->reg.file;
+   uint m;
+
+   if (id < 0)
+      return;
+   s = nv_type_order(val->reg.type) - 1;
+   id <<= s;
+   m = (1 << (1 << s)) - 1;
+
+   set->bits[f][id / 32] |= m << (id % 32);
+
+   if (set->pc->max_reg[f] < id)
+      set->pc->max_reg[f] = id;
+}
+
+static INLINE void
+reg_release(struct register_set *set, struct nv_value *val)
+{
+   int s, id = val->reg.id, f = val->reg.file;
+   uint m;
+
+   if (id < 0)
+      return;
+
+   s = nv_type_order(val->reg.type) - 1;
+   id <<= s;
+   m = (1 << (1 << s)) - 1;
+
+   set->bits[f][id / 32] &= ~(m << (id % 32));
+}
+
+static INLINE boolean
+join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
+{
+   int i;
+   struct nv_value *val;
+
+   if (a->reg.file != b->reg.file ||
+       nv_type_sizeof(a->reg.type) != nv_type_sizeof(b->reg.type))
+      return FALSE;
+
+   if (a->join->reg.id == b->join->reg.id)
+      return TRUE;
+
+#if 1
+   /* either a or b or both have been assigned */
+
+   if (a->join->reg.id >= 0 && b->join->reg.id >= 0)
+      return FALSE;
+   else
+   if (b->join->reg.id >= 0) {
+      if (a->join->reg.id >= 0)
+         return FALSE;
+      val = a;
+      a = b;
+      b = val;
+   }
+
+   for (i = 0; i < ctx->pc->num_values; ++i) {
+      val = &ctx->pc->values[i];
+
+      if (val->join->reg.id != a->join->reg.id)
+         continue;
+      if (val->join != a->join && livei_have_overlap(val->join, b->join))
+         return FALSE;
+   }
+   return TRUE;
+#endif
+   return FALSE;
+}
+
+static INLINE void
+do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
+{
+   int j;
+   struct nv_value *bjoin = b->join;
+
+   if (b->join->reg.id >= 0)
+      a->join->reg.id = b->join->reg.id;
+
+   livei_unify(a->join, b->join);
+
+#ifdef NV50_RA_DEBUG_JOIN
+   debug_printf("joining %i to %i\n", b->n, a->n);
+#endif
+   
+   /* make a->join the new representative */
+   for (j = 0; j < ctx->pc->num_values; ++j) 
+      if (ctx->pc->values[j].join == bjoin)
+         ctx->pc->values[j].join = a->join;
+
+   assert(b->join == a->join);
+}
+
+static INLINE void
+try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
+{
+   if (!join_allowed(ctx, a, b)) {
+#ifdef NV50_RA_DEBUG_JOIN
+      debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n);
+#endif
+      return;
+   }
+   if (livei_have_overlap(a->join, b->join)) {
+#ifdef NV50_RA_DEBUG_JOIN
+      debug_printf("cannot join %i to %i: livei overlap\n", b->n, a->n);
+      livei_print(a);
+      livei_print(b);
+#endif
+      return;
+   }
+
+   do_join_values(ctx, a, b);
+}
+
+static INLINE boolean
+need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p)
+{
+   int i = 0, n = 0;
+
+   for (; i < 2; ++i)
+      if (p->out[i] && !IS_LOOP_EDGE(p->out_kind[i]))
+         ++n;
+
+   return (b->num_in > 1) && (n == 2);
+}
+
+static int
+phi_opnd_for_bb(struct nv_instruction *phi, struct nv_basic_block *b,
+                struct nv_basic_block *tb)
+{
+   int i, j;
+
+   for (j = -1, i = 0; i < 4 && phi->src[i]; ++i) {
+      if (!nvbb_reachable_by(b, phi->src[i]->value->insn->bb, tb))
+         continue;
+      /* NOTE: back-edges are ignored by the reachable-by check */
+      if (j < 0 || !nvbb_reachable_by(phi->src[j]->value->insn->bb,
+                                      phi->src[i]->value->insn->bb, tb))
+         j = i;
+   }
+   return j;
+}
+
+/* For each operand of each PHI in b, generate a new value by inserting a MOV
+ * at the end of the block it is coming from and replace the operand with its
+ * result. This eliminates liveness conflicts and enables us to let values be
+ * copied to the right register if such a conflict exists nonetheless.
+ *
+ * These MOVs are also crucial in making sure the live intervals of phi srces
+ * are extended until the end of the loop, since they are not included in the
+ * live-in sets.
+ */
+static int
+pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
+{
+   struct nv_instruction *i, *ni;
+   struct nv_value *val;
+   struct nv_basic_block *p, *pn;
+   int n, j;
+
+   b->pass_seq = ctx->pc->pass_seq;
+
+   for (n = 0; n < b->num_in; ++n) {
+      p = pn = b->in[n];
+      assert(p);
+
+      if (need_new_else_block(b, p)) {
+         pn = new_basic_block(ctx->pc);
+
+         if (p->out[0] == b)
+            p->out[0] = pn;
+         else
+            p->out[1] = pn;
+
+         if (p->exit->target == b) /* target to new else-block */
+            p->exit->target = pn;
+
+         b->in[n] = pn;
+
+         pn->out[0] = b;
+         pn->in[0] = p;
+         pn->num_in = 1;
+      }
+      ctx->pc->current_block = pn;
+
+      for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) {
+         if ((j = phi_opnd_for_bb(i, p, b)) < 0)
+            continue;
+         val = i->src[j]->value;
+
+         if (i->src[j]->flags) {
+            val = val->insn->src[0]->value;
+            while (j < 4 && i->src[j])
+               ++j;
+            assert(j < 4);
+         }
+
+         ni = new_instruction(ctx->pc, NV_OP_MOV);
+
+         /* TODO: insert instruction at correct position in the first place */
+         if (ni->prev && ni->prev->target)
+            nv_nvi_permute(ni->prev, ni);
+
+         ni->def[0] = new_value(ctx->pc, val->reg.file, val->reg.type);
+         ni->def[0]->insn = ni;
+         ni->src[0] = new_ref(ctx->pc, val);
+
+         nv_reference(ctx->pc, &i->src[j], ni->def[0]);
+
+         i->src[j]->flags = 1;
+      }
+
+      if (pn != p && pn->exit) {
+         ctx->pc->current_block = b->in[n ? 0 : 1];
+         ni = new_instruction(ctx->pc, NV_OP_BRA);
+         ni->target = b;
+         ni->is_terminator = 1;
+      }
+   }
+
+   for (j = 0; j < 2; ++j)
+      if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq)
+         pass_generate_phi_movs(ctx, b->out[j]);
+
+   return 0;
+}
+
+static int
+pass_join_values(struct nv_pc_pass *ctx, int iter)
+{
+   int c, n;
+
+   for (n = 0; n < ctx->num_insns; ++n) {
+      struct nv_instruction *i = ctx->insns[n];
+
+      switch (i->opcode) {
+      case NV_OP_PHI:
+         if (iter != 2)
+            break;
+         for (c = 0; c < 4 && i->src[c]; ++c)
+            try_join_values(ctx, i->def[0], i->src[c]->value);
+         break;
+      case NV_OP_MOV:
+         if ((iter == 2) && i->src[0]->value->insn &&
+             !nv_is_vector_op(i->src[0]->value->join->insn->opcode))
+            try_join_values(ctx, i->def[0], i->src[0]->value);
+         break;
+      case NV_OP_SELECT:
+         if (iter != 1)
+            break;
+         for (c = 0; c < 4 && i->src[c]; ++c) {
+            assert(join_allowed(ctx, i->def[0], i->src[c]->value));
+            do_join_values(ctx, i->def[0], i->src[c]->value);
+         }
+         break;
+      case NV_OP_TEX:
+      case NV_OP_TXB:
+      case NV_OP_TXL:
+      case NV_OP_TXQ:
+         if (iter)
+            break;
+         for (c = 0; c < 4; ++c) {
+            if (!i->src[c])
+               break;
+            do_join_values(ctx, i->def[c], i->src[c]->value);
+         }
+         break;
+      default:
+         break;
+      }
+   }
+   return 0;
+}
+
+/* Order the instructions so that live intervals can be expressed in numbers. */
+static void
+pass_order_instructions(void *priv, struct nv_basic_block *b)
+{
+   struct nv_pc_pass *ctx = (struct nv_pc_pass *)priv;
+   struct nv_instruction *i;
+
+   b->pass_seq = ctx->pc->pass_seq;
+
+   assert(!b->exit || !b->exit->next);
+   for (i = b->phi; i; i = i->next) {
+      i->serial = ctx->num_insns;
+      ctx->insns[ctx->num_insns++] = i;
+   }
+}
+
+static void
+bb_live_set_print(struct nv_pc *pc, struct nv_basic_block *b)
+{
+#ifdef NV50_RA_DEBUG_LIVE_SETS
+   int j;
+   struct nv_value *val;
+
+   debug_printf("LIVE-INs of BB:%i: ", b->id);
+
+   for (j = 0; j < pc->num_values; ++j) {
+      if (!(b->live_set[j / 32] & (1 << (j % 32))))
+         continue;
+      val = &pc->values[j];
+      if (!val->insn)
+         continue;
+      debug_printf("%i ", val->n);
+   }
+   debug_printf("\n");
+#endif
+}
+
+static INLINE void
+live_set_add(struct nv_basic_block *b, struct nv_value *val)
+{
+   if (!val->insn) /* don't add non-def values */
+      return;
+   b->live_set[val->n / 32] |= 1 << (val->n % 32);
+}
+
+static INLINE void
+live_set_rem(struct nv_basic_block *b, struct nv_value *val)
+{
+   b->live_set[val->n / 32] &= ~(1 << (val->n % 32));
+}
+
+static INLINE boolean
+live_set_test(struct nv_basic_block *b, struct nv_ref *ref)
+{
+   int n = ref->value->n;
+   return b->live_set[n / 32] & (1 << (n % 32));
+}
+
+/* The live set of a block contains those values that are live immediately
+ * before the beginning of the block, so do a backwards scan.
+ */
+static int
+pass_build_live_sets(struct nv_pc_pass *ctx, struct nv_basic_block *b)
+{
+   struct nv_instruction *i;
+   int j, n, ret = 0;
+
+   if (b->pass_seq >= ctx->pc->pass_seq)
+      return 0;
+   b->pass_seq = ctx->pc->pass_seq;
+
+   /* slight hack for undecidedness: set phi = entry if it's undefined */
+   if (!b->phi)
+      b->phi = b->entry;
+
+   for (n = 0; n < 2; ++n) {
+      if (!b->out[n] || b->out[n] == b)
+         continue;
+      ret = pass_build_live_sets(ctx, b->out[n]);
+      if (ret)
+         return ret;
+
+      if (n == 0) {
+         for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j)
+            b->live_set[j] = b->out[n]->live_set[j];
+      } else {
+         for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j)
+            b->live_set[j] |= b->out[n]->live_set[j];
+      }
+   }
+
+   if (!b->entry)
+      return 0;
+
+   bb_live_set_print(ctx->pc, b);
+
+   for (i = b->exit; i != b->entry->prev; i = i->prev) {
+      for (j = 0; j < 4; j++) {
+         if (!i->def[j])
+            break;
+         live_set_rem(b, i->def[j]);
+      }
+      for (j = 0; j < 4; j++) {
+         if (!i->src[j])
+            break;
+         live_set_add(b, i->src[j]->value);
+      }
+      if (i->src[4])
+         live_set_add(b, i->src[4]->value);
+      if (i->flags_def)
+         live_set_rem(b, i->flags_def);
+      if (i->flags_src)
+         live_set_add(b, i->flags_src->value);
+   }
+   for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next)
+      live_set_rem(b, i->def[0]);
+
+   bb_live_set_print(ctx->pc, b);
+
+   return 0;
+}
+
+static void collect_live_values(struct nv_basic_block *b, const int n)
+{
+   int i;
+
+   if (b->out[0]) {
+      if (b->out[1]) { /* what to do about back-edges ? */
+         for (i = 0; i < n; ++i)
+            b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i];
+      } else {
+         memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t));
+      }
+   } else
+   if (b->out[1]) {
+      memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t));
+   } else {
+      memset(b->live_set, 0, n * sizeof(uint32_t));
+   }
+}
+
+/* NOTE: the live intervals of phi functions start at the first non-phi insn. */
+static int
+pass_build_intervals(struct nv_pc_pass *ctx, struct nv_basic_block *b)
+{
+   struct nv_instruction *i, *i_stop;
+   int j, s;
+   const int n = (ctx->pc->num_values + 31) / 32;
+
+   /* verify that first block does not have live-in values */
+   if (b->num_in == 0)
+      for (j = 0; j < n; ++j)
+         assert(b->live_set[j] == 0);
+
+   collect_live_values(b, n);
+
+   /* remove live-outs def'd in a parallel block, hopefully they're all phi'd */
+   for (j = 0; j < 2; ++j) {
+      if (!b->out[j] || !b->out[j]->phi)
+         continue;
+      for (i = b->out[j]->phi; i->opcode == NV_OP_PHI; i = i->next) {
+         live_set_rem(b, i->def[0]);
+
+         for (s = 0; s < 4; ++s) {
+            if (!i->src[s])
+               break;
+            assert(i->src[s]->value->insn);
+            if (nvbb_reachable_by(b, i->src[s]->value->insn->bb, b->out[j]))
+               live_set_add(b, i->src[s]->value);
+            else
+               live_set_rem(b, i->src[s]->value);
+         }
+      }
+   }
+
+   /* remaining live-outs are live until the end */
+   if (b->exit) {
+      for (j = 0; j < ctx->pc->num_values; ++j) {
+         if (!(b->live_set[j / 32] & (1 << (j % 32))))
+            continue;
+         add_range(&ctx->pc->values[j], b, b->exit->serial + 1);
+#ifdef NV50_RA_DEBUG_LIVEI
+         debug_printf("adding range for live value %i: ", j);
+         livei_print(&ctx->pc->values[j]);
+#endif
+
+      }
+   }
+
+   i_stop = b->entry ? b->entry->prev : NULL;
+
+   /* don't have to include phi functions here (will have 0 live range) */
+   for (i = b->exit; i != i_stop; i = i->prev) {
+      assert(i->serial >= b->phi->serial && i->serial <= b->exit->serial);
+      for (j = 0; j < 4; ++j) {
+         if (i->def[j])
+            live_set_rem(b, i->def[j]);
+      }
+      if (i->flags_def)
+         live_set_rem(b, i->flags_def);
+
+      for (j = 0; j < 5; ++j) {
+         if (i->src[j] && !live_set_test(b, i->src[j])) {
+            live_set_add(b, i->src[j]->value);
+            add_range(i->src[j]->value, b, i->serial);
+#ifdef NV50_RA_DEBUG_LIVEI
+            debug_printf("adding range for source %i (ends living): ",
+                         i->src[j]->value->n);
+            livei_print(i->src[j]->value);
+#endif
+         }
+      }
+      if (i->flags_src && !live_set_test(b, i->flags_src)) {
+         live_set_add(b, i->flags_src->value);
+         add_range(i->flags_src->value, b, i->serial);
+#ifdef NV50_RA_DEBUG_LIVEI
+         debug_printf("adding range for source %i (ends living): ",
+                      i->flags_src->value->n);
+         livei_print(i->flags_src->value);
+#endif
+      }
+   }
+
+   b->pass_seq = ctx->pc->pass_seq;
+
+   if (b->out[0] && b->out[0]->pass_seq < ctx->pc->pass_seq)
+      pass_build_intervals(ctx, b->out[0]);
+
+   if (b->out[1] && b->out[1]->pass_seq < ctx->pc->pass_seq)
+      pass_build_intervals(ctx, b->out[1]);
+
+   return 0;
+}
+
+static INLINE void
+nv50_ctor_register_set(struct nv_pc *pc, struct register_set *set)
+{
+   memset(set, 0, sizeof(*set));
+
+   set->last[NV_FILE_GPR] = 255;
+   set->last[NV_FILE_OUT] = 127;
+   set->last[NV_FILE_FLAGS] = 4;
+   set->last[NV_FILE_ADDR] = 4;
+
+   set->pc = pc;
+}
+
+static void
+insert_ordered_tail(struct nv_value *list, struct nv_value *nval)
+{
+   struct nv_value *elem = list->prev;
+
+   for (elem = list->prev;
+       elem != list && elem->livei->bgn > nval->livei->bgn;
+       elem = elem->prev);
+   /* now elem begins before or at the same time as val */
+
+   nval->prev = elem;
+   nval->next = elem->next;
+   elem->next->prev = nval;
+   elem->next = nval;
+}
+
+static int
+pass_linear_scan(struct nv_pc_pass *ctx, int iter)
+{
+   struct nv_instruction *i;
+   struct register_set f, free;
+   int k, n;
+   struct nv_value *cur, *val, *tmp[2];
+   struct nv_value active, inactive, handled, unhandled;
+
+   make_empty_list(&active);
+   make_empty_list(&inactive);
+   make_empty_list(&handled);
+   make_empty_list(&unhandled);
+
+   nv50_ctor_register_set(ctx->pc, &free);
+
+   /* joined values should have range = NULL and thus not be added;
+    * also, fixed memory values won't be added because they're not
+    * def'd, just used
+    */
+   for (n = 0; n < ctx->num_insns; ++n) {
+      i = ctx->insns[n];
+
+      for (k = 0; k < 4; ++k) {
+         if (i->def[k] && i->def[k]->livei)
+            insert_ordered_tail(&unhandled, i->def[k]);
+         else
+         if (0 && i->def[k])
+            debug_printf("skipping def'd value %i: no livei\n", i->def[k]->n);
+      }
+      if (i->flags_def && i->flags_def->livei)
+         insert_ordered_tail(&unhandled, i->flags_def);
+   }
+
+   for (val = unhandled.next; val != unhandled.prev; val = val->next) {
+      assert(val->join == val);
+      assert(val->livei->bgn <= val->next->livei->bgn);
+   }
+
+   foreach_s(cur, tmp[0], &unhandled) {
+      remove_from_list(cur);
+
+      foreach_s(val, tmp[1], &active) {
+         if (livei_end(val) <= cur->livei->bgn) {
+            reg_release(&free, val);
+            move_to_head(&handled, val);
+         } else
+         if (!livei_contains(val, cur->livei->bgn)) {
+            reg_release(&free, val);
+            move_to_head(&inactive, val);
+         }
+      }
+
+      foreach_s(val, tmp[1], &inactive) {
+         if (livei_end(val) <= cur->livei->bgn)
+            move_to_head(&handled, val);
+         else
+         if (livei_contains(val, cur->livei->bgn)) {
+            reg_occupy(&free, val);
+            move_to_head(&active, val);
+         }
+      }
+
+      f = free;
+
+      foreach(val, &inactive)
+         if (livei_have_overlap(val, cur))
+            reg_occupy(&f, val);
+
+      foreach(val, &unhandled)
+         if (val->reg.id >= 0 && livei_have_overlap(val, cur))
+            reg_occupy(&f, val);
+
+      if (cur->reg.id < 0) {
+         boolean mem = FALSE;
+
+         if (nv_is_vector_op(cur->insn->opcode))
+            mem = !reg_assign(&f, &cur->insn->def[0], 4);
+         else
+         if (iter)
+            mem = !reg_assign(&f, &cur, 1);
+
+         if (mem) {
+            NOUVEAU_ERR("out of registers\n");
+            abort();
+         }
+      }
+      insert_at_head(&active, cur);
+      reg_occupy(&free, cur);
+   }
+
+   return 0;
+}
+
+int
+nv_pc_exec_pass1(struct nv_pc *pc)
+{
+   struct nv_pc_pass *ctx;
+   int i, ret;
+
+   NV50_DBGMSG("REGISTER ALLOCATION - entering\n");
+
+   ctx = CALLOC_STRUCT(nv_pc_pass);
+   if (!ctx)
+      return -1;
+   ctx->pc = pc;
+
+   ctx->insns = CALLOC(NV_PC_MAX_INSTRUCTIONS, sizeof(struct nv_instruction *));
+
+   pc->pass_seq++;
+   ret = pass_generate_phi_movs(ctx, pc->root);
+   assert(!ret);
+
+   for (i = 0; i < pc->loop_nesting_bound; ++i) {
+      pc->pass_seq++;
+      ret = pass_build_live_sets(ctx, pc->root);
+      assert(!ret && "live sets");
+      if (ret) {
+         NOUVEAU_ERR("failed to build live sets (iteration %d)\n", i);
+         goto out;
+      }
+   }
+
+   pc->pass_seq++;
+   nv_pc_pass_in_order(pc->root, pass_order_instructions, ctx);
+
+   pc->pass_seq++;
+   ret = pass_build_intervals(ctx, pc->root);
+   assert(!ret && "build intervals");
+   if (ret) {
+      NOUVEAU_ERR("failed to build live intervals\n");
+      goto out;
+   }
+
+#ifdef NV50_RA_DEBUG_LIVEI
+   for (i = 0; i < pc->num_values; ++i)
+      livei_print(&pc->values[i]);
+#endif
+
+   ret = pass_join_values(ctx, 0);
+   if (ret)
+      goto out;
+   ret = pass_linear_scan(ctx, 0);
+   if (ret)
+      goto out;
+   ret = pass_join_values(ctx, 1);
+   if (ret)
+      goto out;
+   ret = pass_join_values(ctx, 2);
+   if (ret)
+      goto out;
+   ret = pass_linear_scan(ctx, 1);
+   if (ret)
+      goto out;
+
+   for (i = 0; i < pc->num_values; ++i)
+      livei_release(&pc->values[i]);
+
+   NV50_DBGMSG("REGISTER ALLOCATION - leaving\n");
+
+out:
+   FREE(ctx);
+   return ret;
+}
index cec2290481f03e66be3de3664a94162dd71afad3..523603ca3af3c730e6d0aa57c8e42989d88b5d59 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright 2008 Ben Skeggs
+ * Copyright 2010 Chrsitoph Bumiller
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * SOFTWARE.
  */
 
-#include "pipe/p_context.h"
-#include "pipe/p_defines.h"
-#include "pipe/p_state.h"
-#include "util/u_inlines.h"
+/* #define NV50_PROGRAM_DEBUG */
 
-#include "pipe/p_shader_tokens.h"
-#include "tgsi/tgsi_parse.h"
-#include "tgsi/tgsi_util.h"
-
-#include "nv50_context.h"
-#include "nv50_transfer.h"
-
-#define NV50_SU_MAX_TEMP 127
-#define NV50_SU_MAX_ADDR 4
-//#define NV50_PROGRAM_DUMP
-
-/* $a5 and $a6 always seem to be 0, and using $a7 gives you noise */
-
-/* ARL - gallium craps itself on progs/vp/arl.txt
- *
- * MSB - Like MAD, but MUL+SUB
- *     - Fuck it off, introduce a way to negate args for ops that
- *       support it.
- *
- * Look into inlining IMMD for ops other than MOV (make it general?)
- *     - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD,
- *       but can emit to P_TEMP first - then MOV later. NVIDIA does this
- *
- * In ops such as ADD it's possible to construct a bad opcode in the !is_long()
- * case, if the emit_src() causes the inst to suddenly become long.
- *
- * Verify half-insns work where expected - and force disable them where they
- * don't work - MUL has it forcibly disabled atm as it fixes POW..
- *
- * FUCK! watch dst==src vectors, can overwrite components that are needed.
- *     ie. SUB R0, R0.yzxw, R0
- *
- * Things to check with renouveau:
- *     FP attr/result assignment - how?
- *             attrib
- *                     - 0x16bc maps vp output onto fp hpos
- *                     - 0x16c0 maps vp output onto fp col0
- *             result
- *                     - colr always 0-3
- *                     - depr always 4
- * 0x16bc->0x16e8 --> some binding between vp/fp regs
- * 0x16b8 --> VP output count
- *
- * 0x1298 --> "MOV rcol.x, fcol.y" "MOV depr, fcol.y" = 0x00000005
- *           "MOV rcol.x, fcol.y" = 0x00000004
- * 0x19a8 --> as above but 0x00000100 and 0x00000000
- *     - 0x00100000 used when KIL used
- * 0x196c --> as above but 0x00000011 and 0x00000000
- *
- * 0x1988 --> 0xXXNNNNNN
- *     - XX == FP high something
- */
-struct nv50_reg {
-       enum {
-               P_TEMP,
-               P_ATTR,
-               P_RESULT,
-               P_CONST,
-               P_IMMD,
-               P_ADDR
-       } type;
-       int index;
-
-       int hw;
-       int mod;
-
-       int rhw; /* result hw for FP outputs, or interpolant index */
-       int acc; /* instruction where this reg is last read (first insn == 1) */
-
-       int vtx; /* vertex index, for GP inputs (TGSI Dimension.Index) */
-       int indirect[2]; /* index into pc->addr, or -1 */
-
-       ubyte buf_index; /* c{0 .. 15}[] or g{0 .. 15}[] */
-};
-
-#define NV50_MOD_NEG 1
-#define NV50_MOD_ABS 2
-#define NV50_MOD_NEG_ABS (NV50_MOD_NEG | NV50_MOD_ABS)
-#define NV50_MOD_SAT 4
-#define NV50_MOD_I32 8
-
-/* NV50_MOD_I32 is used to indicate integer mode for neg/abs */
-
-/* STACK: Conditionals and loops have to use the (per warp) stack.
- * Stack entries consist of an entry type (divergent path, join at),
- * a mask indicating the active threads of the warp, and an address.
- * MPs can store 12 stack entries internally, if we need more (and
- * we probably do), we have to create a stack buffer in VRAM.
- */
-/* impose low limits for now */
-#define NV50_MAX_COND_NESTING 4
-#define NV50_MAX_LOOP_NESTING 3
-
-#define JOIN_ON(e) e; pc->p->exec_tail->inst[1] |= 2
-
-struct nv50_pc {
-       struct nv50_program *p;
-
-       /* hw resources */
-       struct nv50_reg *r_temp[NV50_SU_MAX_TEMP];
-       struct nv50_reg r_addr[NV50_SU_MAX_ADDR];
-
-       /* tgsi resources */
-       struct nv50_reg *temp;
-       int temp_nr;
-       struct nv50_reg *attr;
-       int attr_nr;
-       struct nv50_reg *result;
-       int result_nr;
-       struct nv50_reg *param;
-       int param_nr;
-       struct nv50_reg *immd;
-       uint32_t *immd_buf;
-       int immd_nr;
-       struct nv50_reg **addr;
-       int addr_nr;
-       struct nv50_reg *sysval;
-       int sysval_nr;
-
-       struct nv50_reg *temp_temp[16];
-       struct nv50_program_exec *temp_temp_exec[16];
-       unsigned temp_temp_nr;
-
-       /* broadcast and destination replacement regs */
-       struct nv50_reg *r_brdc;
-       struct nv50_reg *r_dst[4];
-
-       struct nv50_reg reg_instances[16];
-       unsigned reg_instance_nr;
-
-       unsigned interp_mode[32];
-       /* perspective interpolation registers */
-       struct nv50_reg *iv_p;
-       struct nv50_reg *iv_c;
-
-       struct nv50_program_exec *if_insn[NV50_MAX_COND_NESTING];
-       struct nv50_program_exec *if_join[NV50_MAX_COND_NESTING];
-       struct nv50_program_exec *loop_brka[NV50_MAX_LOOP_NESTING];
-       int if_lvl, loop_lvl;
-       unsigned loop_pos[NV50_MAX_LOOP_NESTING];
-
-       unsigned *insn_pos; /* actual program offset of each TGSI insn */
-       boolean in_subroutine;
-
-       /* current instruction and total number of insns */
-       unsigned insn_cur;
-       unsigned insn_nr;
-
-       boolean allow32;
-
-       uint8_t edgeflag_out;
-};
-
-static struct nv50_reg *get_address_reg(struct nv50_pc *, struct nv50_reg *);
-
-static INLINE void
-ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw)
-{
-       reg->type = type;
-       reg->index = index;
-       reg->hw = hw;
-       reg->mod = 0;
-       reg->rhw = -1;
-       reg->vtx = -1;
-       reg->acc = 0;
-       reg->indirect[0] = reg->indirect[1] = -1;
-       reg->buf_index = (type == P_CONST) ? 1 : 0;
-}
-
-static INLINE unsigned
-popcnt4(uint32_t val)
-{
-       static const unsigned cnt[16]
-       = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
-       return cnt[val & 0xf];
-}
-
-static void
-terminate_mbb(struct nv50_pc *pc)
-{
-       int i;
-
-       /* remove records of temporary address register values */
-       for (i = 0; i < NV50_SU_MAX_ADDR; ++i)
-               if (pc->r_addr[i].index < 0)
-                       pc->r_addr[i].acc = 0;
-}
-
-static void
-alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
-{
-       int i = 0;
-
-       if (reg->type == P_RESULT) {
-               if (pc->p->cfg.high_result < (reg->hw + 1))
-                       pc->p->cfg.high_result = reg->hw + 1;
-       }
-
-       if (reg->type != P_TEMP)
-               return;
-
-       if (reg->hw >= 0) {
-               /*XXX: do this here too to catch FP temp-as-attr usage..
-                *     not clean, but works */
-               if (pc->p->cfg.high_temp < (reg->hw + 1))
-                       pc->p->cfg.high_temp = reg->hw + 1;
-               return;
-       }
-
-       if (reg->rhw != -1) {
-               /* try to allocate temporary with index rhw first */
-               if (!(pc->r_temp[reg->rhw])) {
-                       pc->r_temp[reg->rhw] = reg;
-                       reg->hw = reg->rhw;
-                       if (pc->p->cfg.high_temp < (reg->rhw + 1))
-                               pc->p->cfg.high_temp = reg->rhw + 1;
-                       return;
-               }
-               /* make sure we don't get things like $r0 needs to go
-                * in $r1 and $r1 in $r0
-                */
-               i = pc->result_nr * 4;
-       }
-
-       for (; i < NV50_SU_MAX_TEMP; i++) {
-               if (!(pc->r_temp[i])) {
-                       pc->r_temp[i] = reg;
-                       reg->hw = i;
-                       if (pc->p->cfg.high_temp < (i + 1))
-                               pc->p->cfg.high_temp = i + 1;
-                       return;
-               }
-       }
-
-       NOUVEAU_ERR("out of registers\n");
-       abort();
-}
-
-static INLINE struct nv50_reg *
-reg_instance(struct nv50_pc *pc, struct nv50_reg *reg)
-{
-       struct nv50_reg *ri;
-
-       assert(pc->reg_instance_nr < 16);
-       ri = &pc->reg_instances[pc->reg_instance_nr++];
-       if (reg) {
-               alloc_reg(pc, reg);
-               *ri = *reg;
-               reg->indirect[0] = reg->indirect[1] = -1;
-               reg->mod = 0;
-       }
-       return ri;
-}
-
-/* XXX: For shaders that aren't executed linearly (e.g. shaders that
- * contain loops), we need to assign all hw regs to TGSI TEMPs early,
- * lest we risk temp_temps overwriting regs alloc'd "later".
- */
-static struct nv50_reg *
-alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)
-{
-       struct nv50_reg *r;
-       int i;
-
-       if (dst && dst->type == P_TEMP && dst->hw == -1)
-               return dst;
-
-       for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
-               if (!pc->r_temp[i]) {
-                       r = MALLOC_STRUCT(nv50_reg);
-                       ctor_reg(r, P_TEMP, -1, i);
-                       pc->r_temp[i] = r;
-                       return r;
-               }
-       }
-
-       NOUVEAU_ERR("out of registers\n");
-       abort();
-       return NULL;
-}
-
-#if 0
-/* release the hardware resource held by r */
-static void
-release_hw(struct nv50_pc *pc, struct nv50_reg *r)
-{
-       assert(r->type == P_TEMP);
-       if (r->hw == -1)
-               return;
-
-       assert(pc->r_temp[r->hw] == r);
-       pc->r_temp[r->hw] = NULL;
-
-       r->acc = 0;
-       if (r->index == -1)
-               FREE(r);
-}
-#endif
-
-static void
-free_temp(struct nv50_pc *pc, struct nv50_reg *r)
-{
-       if (r->index == -1) {
-               unsigned hw = r->hw;
-
-               FREE(pc->r_temp[hw]);
-               pc->r_temp[hw] = NULL;
-       }
-}
-
-static int
-alloc_temp4(struct nv50_pc *pc, struct nv50_reg *dst[4], int idx)
-{
-       int i;
-
-       if ((idx + 4) >= NV50_SU_MAX_TEMP)
-               return 1;
-
-       if (pc->r_temp[idx] || pc->r_temp[idx + 1] ||
-           pc->r_temp[idx + 2] || pc->r_temp[idx + 3])
-               return alloc_temp4(pc, dst, idx + 4);
-
-       for (i = 0; i < 4; i++) {
-               dst[i] = MALLOC_STRUCT(nv50_reg);
-               ctor_reg(dst[i], P_TEMP, -1, idx + i);
-               pc->r_temp[idx + i] = dst[i];
-       }
-
-       return 0;
-}
-
-static void
-free_temp4(struct nv50_pc *pc, struct nv50_reg *reg[4])
-{
-       int i;
-
-       for (i = 0; i < 4; i++)
-               free_temp(pc, reg[i]);
-}
-
-static struct nv50_reg *
-temp_temp(struct nv50_pc *pc, struct nv50_program_exec *e)
-{
-       if (pc->temp_temp_nr >= 16)
-               assert(0);
-
-       pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL);
-       pc->temp_temp_exec[pc->temp_temp_nr] = e;
-       return pc->temp_temp[pc->temp_temp_nr++];
-}
-
-/* This *must* be called for all nv50_program_exec that have been
- * given as argument to temp_temp, or the temps will be leaked !
- */
-static void
-kill_temp_temp(struct nv50_pc *pc, struct nv50_program_exec *e)
-{
-       int i;
-
-       for (i = 0; i < pc->temp_temp_nr; i++)
-               if (pc->temp_temp_exec[i] == e)
-                       free_temp(pc, pc->temp_temp[i]);
-       if (!e)
-               pc->temp_temp_nr = 0;
-}
-
-static int
-ctor_immd_4u32(struct nv50_pc *pc,
-              uint32_t x, uint32_t y, uint32_t z, uint32_t w)
-{
-       unsigned size = pc->immd_nr * 4 * sizeof(uint32_t);
-
-       pc->immd_buf = REALLOC(pc->immd_buf, size, size + 4 * sizeof(uint32_t));
-
-       pc->immd_buf[(pc->immd_nr * 4) + 0] = x;
-       pc->immd_buf[(pc->immd_nr * 4) + 1] = y;
-       pc->immd_buf[(pc->immd_nr * 4) + 2] = z;
-       pc->immd_buf[(pc->immd_nr * 4) + 3] = w;
-
-       return pc->immd_nr++;
-}
-
-static INLINE int
-ctor_immd_4f32(struct nv50_pc *pc, float x, float y, float z, float w)
-{
-       return ctor_immd_4u32(pc, fui(x), fui(y), fui(z), fui(w));
-}
-
-static struct nv50_reg *
-alloc_immd(struct nv50_pc *pc, float f)
-{
-       struct nv50_reg *r = MALLOC_STRUCT(nv50_reg);
-       unsigned hw;
-
-       for (hw = 0; hw < pc->immd_nr * 4; hw++)
-               if (pc->immd_buf[hw] == fui(f))
-                       break;
-
-       if (hw == pc->immd_nr * 4)
-               hw = ctor_immd_4f32(pc, f, -f, 0.5 * f, 0) * 4;
-
-       ctor_reg(r, P_IMMD, -1, hw);
-       return r;
-}
-
-static struct nv50_program_exec *
-exec(struct nv50_pc *pc)
-{
-       struct nv50_program_exec *e = CALLOC_STRUCT(nv50_program_exec);
-
-       e->param.index = -1;
-       return e;
-}
-
-static void
-emit(struct nv50_pc *pc, struct nv50_program_exec *e)
-{
-       struct nv50_program *p = pc->p;
-
-       if (p->exec_tail)
-               p->exec_tail->next = e;
-       if (!p->exec_head)
-               p->exec_head = e;
-       p->exec_tail = e;
-       p->exec_size += (e->inst[0] & 1) ? 2 : 1;
-
-       kill_temp_temp(pc, e);
-}
-
-static INLINE void set_long(struct nv50_pc *, struct nv50_program_exec *);
-
-static boolean
-is_long(struct nv50_program_exec *e)
-{
-       if (e->inst[0] & 1)
-               return TRUE;
-       return FALSE;
-}
-
-static boolean
-is_immd(struct nv50_program_exec *e)
-{
-       if (is_long(e) && (e->inst[1] & 3) == 3)
-               return TRUE;
-       return FALSE;
-}
-
-static boolean
-is_join(struct nv50_program_exec *e)
-{
-       if (is_long(e) && (e->inst[1] & 3) == 2)
-               return TRUE;
-       return FALSE;
-}
-
-static INLINE boolean
-is_control_flow(struct nv50_program_exec *e)
-{
-       return (e->inst[0] & 2);
-}
-
-static INLINE void
-set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx,
-        struct nv50_program_exec *e)
-{
-       assert(!is_immd(e));
-       set_long(pc, e);
-       e->inst[1] &= ~((0x1f << 7) | (0x3 << 12));
-       e->inst[1] |= (pred << 7) | (idx << 12);
-}
-
-static INLINE void
-set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx,
-           struct nv50_program_exec *e)
-{
-       set_long(pc, e);
-       e->inst[1] &= ~((0x3 << 4) | (1 << 6));
-       e->inst[1] |= (idx << 4) | (on << 6);
-}
-
-static INLINE void
-set_long(struct nv50_pc *pc, struct nv50_program_exec *e)
-{
-       if (is_long(e))
-               return;
-
-       e->inst[0] |= 1;
-       set_pred(pc, 0xf, 0, e);
-       set_pred_wr(pc, 0, 0, e);
-}
-
-static INLINE void
-set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e)
-{
-       if (dst->type == P_RESULT) {
-               set_long(pc, e);
-               e->inst[1] |= 0x00000008;
-       }
-
-       alloc_reg(pc, dst);
-       if (dst->hw > 63)
-               set_long(pc, e);
-       e->inst[0] |= (dst->hw << 2);
-}
-
-static INLINE void
-set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
-{
-       set_long(pc, e);
-       /* XXX: can't be predicated - bits overlap; cases where both
-        * are required should be avoided by using pc->allow32 */
-       set_pred(pc, 0, 0, e);
-       set_pred_wr(pc, 0, 0, e);
-
-       e->inst[1] |= 0x00000002 | 0x00000001;
-       e->inst[0] |= (pc->immd_buf[imm->hw] & 0x3f) << 16;
-       e->inst[1] |= (pc->immd_buf[imm->hw] >> 6) << 2;
-}
-
-static INLINE void
-set_addr(struct nv50_program_exec *e, struct nv50_reg *a)
-{
-       assert(a->type == P_ADDR);
-
-       assert(!(e->inst[0] & 0x0c000000));
-       assert(!(e->inst[1] & 0x00000004));
-
-       e->inst[0] |= (a->hw & 3) << 26;
-       e->inst[1] |= a->hw & 4;
-}
-
-static void
-emit_arl(struct nv50_pc *, struct nv50_reg *, struct nv50_reg *, uint8_t);
-
-static void
-emit_shl_imm(struct nv50_pc *, struct nv50_reg *, struct nv50_reg *, int);
-
-static void
-emit_mov_from_addr(struct nv50_pc *pc, struct nv50_reg *dst,
-                  struct nv50_reg *src)
-{
-       struct nv50_program_exec *e = exec(pc);
-
-       e->inst[1] = 0x40000000;
-       set_long(pc, e);
-       set_dst(pc, dst, e);
-       set_addr(e, src);
-
-       emit(pc, e);
-}
-
-static void
-emit_add_addr_imm(struct nv50_pc *pc, struct nv50_reg *dst,
-                 struct nv50_reg *src0, uint16_t src1_val)
-{
-       struct nv50_program_exec *e = exec(pc);
-
-       e->inst[0] = 0xd0000000 | (src1_val << 9);
-       e->inst[1] = 0x20000000;
-       set_long(pc, e);
-       e->inst[0] |= dst->hw << 2;
-       if (src0) /* otherwise will add to $a0, which is always 0 */
-               set_addr(e, src0);
-
-       emit(pc, e);
-}
-
-#define INTERP_LINEAR          0
-#define INTERP_FLAT            1
-#define INTERP_PERSPECTIVE     2
-#define INTERP_CENTROID                4
-
-/* interpolant index has been stored in dst->rhw */
-static void
-emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *iv,
-               unsigned mode)
-{
-       struct nv50_program_exec *e = exec(pc);
-       assert(dst->rhw != -1);
-
-       e->inst[0] |= 0x80000000;
-       set_dst(pc, dst, e);
-       e->inst[0] |= (dst->rhw << 16);
-
-       if (mode & INTERP_FLAT) {
-               e->inst[0] |= (1 << 8);
-       } else {
-               if (mode & INTERP_PERSPECTIVE) {
-                       e->inst[0] |= (1 << 25);
-                       alloc_reg(pc, iv);
-                       e->inst[0] |= (iv->hw << 9);
-               }
-
-               if (mode & INTERP_CENTROID)
-                       e->inst[0] |= (1 << 24);
-       }
-
-       emit(pc, e);
-}
-
-static void
-set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s,
-        struct nv50_program_exec *e)
-{
-       set_long(pc, e);
-
-       e->param.index = src->hw & 127;
-       e->param.shift = s;
-       e->param.mask = m << (s % 32);
-
-       if (src->hw < 0 || src->hw > 127) /* need (additional) address reg */
-               set_addr(e, get_address_reg(pc, src));
-       else
-       if (src->acc < 0) {
-               assert(src->type == P_CONST);
-               set_addr(e, pc->addr[src->indirect[0]]);
-       }
-
-       e->inst[1] |= (src->buf_index << 22);
-}
-
-/* Never apply nv50_reg::mod in emit_mov, or carefully check the code !!! */
-static void
-emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
-{
-       struct nv50_program_exec *e = exec(pc);
-
-       e->inst[0] = 0x10000000;
-       if (!pc->allow32)
-               set_long(pc, e);
-
-       set_dst(pc, dst, e);
-
-       if (!is_long(e) && src->type == P_IMMD) {
-               set_immd(pc, src, e);
-               /*XXX: 32-bit, but steals part of "half" reg space - need to
-                *     catch and handle this case if/when we do half-regs
-                */
-       } else
-       if (src->type == P_IMMD || src->type == P_CONST) {
-               set_long(pc, e);
-               set_data(pc, src, 0x7f, 9, e);
-               e->inst[1] |= 0x20000000; /* mov from c[] */
-       } else {
-               if (src->type == P_ATTR) {
-                       set_long(pc, e);
-                       e->inst[1] |= 0x00200000;
-
-                       if (src->vtx >= 0) {
-                               /* indirect (vertex base + c) load from p[] */
-                               e->inst[0] |= 0x01800000;
-                               set_addr(e, get_address_reg(pc, src));
-                       }
-               }
-
-               alloc_reg(pc, src);
-               if (src->hw > 63)
-                       set_long(pc, e);
-               e->inst[0] |= (src->hw << 9);
-       }
-
-       if (is_long(e) && !is_immd(e)) {
-               e->inst[1] |= 0x04000000; /* 32-bit */
-               e->inst[1] |= 0x0000c000; /* 32-bit c[] load / lane mask 0:1 */
-               if (!(e->inst[1] & 0x20000000))
-                       e->inst[1] |= 0x00030000; /* lane mask 2:3 */
-       } else
-               e->inst[0] |= 0x00008000;
-
-       emit(pc, e);
-}
-
-static INLINE void
-emit_mov_immdval(struct nv50_pc *pc, struct nv50_reg *dst, float f)
-{
-       struct nv50_reg *imm = alloc_immd(pc, f);
-       emit_mov(pc, dst, imm);
-       FREE(imm);
-}
-
-/* Assign the hw of the discarded temporary register src
- * to the tgsi register dst and free src.
- */
-static void
-assimilate_temp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
-{
-       assert(src->index == -1 && src->hw != -1);
-
-       if (pc->if_lvl || pc->loop_lvl ||
-           (dst->type != P_TEMP) ||
-           (src->hw < pc->result_nr * 4 &&
-            pc->p->type == PIPE_SHADER_FRAGMENT) ||
-           pc->p->info.opcode_count[TGSI_OPCODE_CAL] ||
-           pc->p->info.opcode_count[TGSI_OPCODE_BRA]) {
-
-               emit_mov(pc, dst, src);
-               free_temp(pc, src);
-               return;
-       }
-
-       if (dst->hw != -1)
-               pc->r_temp[dst->hw] = NULL;
-       pc->r_temp[src->hw] = dst;
-       dst->hw = src->hw;
-
-       FREE(src);
-}
-
-static void
-emit_nop(struct nv50_pc *pc)
-{
-       struct nv50_program_exec *e = exec(pc);
-
-       e->inst[0] = 0xf0000000;
-       set_long(pc, e);
-       e->inst[1] = 0xe0000000;
-       emit(pc, e);
-}
-
-static boolean
-check_swap_src_0_1(struct nv50_pc *pc,
-                  struct nv50_reg **s0, struct nv50_reg **s1)
-{
-       struct nv50_reg *src0 = *s0, *src1 = *s1;
-
-       if (src0->type == P_CONST) {
-               if (src1->type != P_CONST) {
-                       *s0 = src1;
-                       *s1 = src0;
-                       return TRUE;
-               }
-       } else
-       if (src1->type == P_ATTR) {
-               if (src0->type != P_ATTR) {
-                       *s0 = src1;
-                       *s1 = src0;
-                       return TRUE;
-               }
-       }
-
-       return FALSE;
-}
-
-static void
-set_src_0_restricted(struct nv50_pc *pc, struct nv50_reg *src,
-                    struct nv50_program_exec *e)
-{
-       struct nv50_reg *temp;
-
-       if (src->type != P_TEMP) {
-               temp = temp_temp(pc, e);
-               emit_mov(pc, temp, src);
-               src = temp;
-       }
-
-       alloc_reg(pc, src);
-       if (src->hw > 63)
-               set_long(pc, e);
-       e->inst[0] |= (src->hw << 9);
-}
-
-static void
-set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
-{
-       if (src->type == P_ATTR) {
-               set_long(pc, e);
-               e->inst[1] |= 0x00200000;
-
-               if (src->vtx >= 0) {
-                       e->inst[0] |= 0x01800000; /* src from p[] */
-                       set_addr(e, get_address_reg(pc, src));
-               }
-       } else
-       if (src->type == P_CONST || src->type == P_IMMD) {
-               struct nv50_reg *temp = temp_temp(pc, e);
-
-               emit_mov(pc, temp, src);
-               src = temp;
-       }
-
-       alloc_reg(pc, src);
-       if (src->hw > 63)
-               set_long(pc, e);
-       e->inst[0] |= (src->hw << 9);
-}
-
-static void
-set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
-{
-       if (src->type == P_ATTR) {
-               struct nv50_reg *temp = temp_temp(pc, e);
-
-               emit_mov(pc, temp, src);
-               src = temp;
-       } else
-       if (src->type == P_CONST || src->type == P_IMMD) {
-               if (e->inst[0] & 0x01800000) {
-                       struct nv50_reg *temp = temp_temp(pc, e);
-
-                       emit_mov(pc, temp, src);
-                       src = temp;
-               } else {
-                       assert(!(e->inst[0] & 0x00800000));
-                       set_data(pc, src, 0x7f, 16, e);
-                       e->inst[0] |= 0x00800000;
-               }
-       }
-
-       alloc_reg(pc, src);
-       if (src->hw > 63)
-               set_long(pc, e);
-       e->inst[0] |= ((src->hw & 127) << 16);
-}
-
-static void
-set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
-{
-       set_long(pc, e);
-
-       if (src->type == P_ATTR) {
-               struct nv50_reg *temp = temp_temp(pc, e);
-
-               emit_mov(pc, temp, src);
-               src = temp;
-       } else
-       if (src->type == P_CONST || src->type == P_IMMD) {
-               if (e->inst[0] & 0x01800000) {
-                       struct nv50_reg *temp = temp_temp(pc, e);
-
-                       emit_mov(pc, temp, src);
-                       src = temp;
-               } else {
-                       assert(!(e->inst[0] & 0x01000000));
-                       set_data(pc, src, 0x7f, 32+14, e);
-                       e->inst[0] |= 0x01000000;
-               }
-       }
-
-       alloc_reg(pc, src);
-       e->inst[1] |= ((src->hw & 127) << 14);
-}
-
-static void
-set_half_src(struct nv50_pc *pc, struct nv50_reg *src, int lh,
-            struct nv50_program_exec *e, int pos)
-{
-       struct nv50_reg *r = src;
-
-       alloc_reg(pc, r);
-       if (r->type != P_TEMP) {
-