Merge remote branch 'origin/master' into pipe-video
[mesa.git] / src / gallium / drivers / nvfx / nvfx_screen.c
index 42094227e1c0c3e23a0cf7a8bdab38d3195878db..4a97dfb9c252f7dad373d448c969d4bf23692cea 100644 (file)
@@ -1,18 +1,20 @@
 #include "pipe/p_screen.h"
 #include "pipe/p_state.h"
+#include "util/u_format.h"
 #include "util/u_format_s3tc.h"
 #include "util/u_simple_screen.h"
 
 #include "nouveau/nouveau_screen.h"
-
+#include "nouveau/nv_object.xml.h"
 #include "nvfx_context.h"
+#include "nvfx_video_context.h"
 #include "nvfx_screen.h"
 #include "nvfx_resource.h"
 #include "nvfx_tex.h"
 
-#define NV30TCL_CHIPSET_3X_MASK 0x00000003
-#define NV34TCL_CHIPSET_3X_MASK 0x00000010
-#define NV35TCL_CHIPSET_3X_MASK 0x000001e0
+#define NV30_3D_CHIPSET_3X_MASK 0x00000003
+#define NV34_3D_CHIPSET_3X_MASK 0x00000010
+#define NV35_3D_CHIPSET_3X_MASK 0x000001e0
 
 #define NV4X_GRCLASS4097_CHIPSETS 0x00000baf
 #define NV4X_GRCLASS4497_CHIPSETS 0x00005450
@@ -27,7 +29,7 @@ nvfx_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
        case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
                return 16;
        case PIPE_CAP_NPOT_TEXTURES:
-               return !!screen->is_nv4x;
+               return screen->advertise_npot;
        case PIPE_CAP_TWO_SIDED_STENCIL:
                return 1;
        case PIPE_CAP_GLSL:
@@ -37,7 +39,7 @@ nvfx_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
        case PIPE_CAP_POINT_SPRITE:
                return 1;
        case PIPE_CAP_MAX_RENDER_TARGETS:
-               return screen->is_nv4x ? 4 : 2;
+               return screen->use_nv4x ? 4 : 1;
        case PIPE_CAP_OCCLUSION_QUERY:
                return 1;
         case PIPE_CAP_TIMER_QUERY:
@@ -53,15 +55,13 @@ nvfx_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
        case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
                return 13;
        case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
-               return !!screen->is_nv4x;
+               return !!screen->use_nv4x;
        case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
                return 1;
        case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
                return 0; /* We have 4 on nv40 - but unsupported currently */
-       case PIPE_CAP_TGSI_CONT_SUPPORTED:
-               return 0;
        case PIPE_CAP_BLEND_EQUATION_SEPARATE:
-               return !!screen->is_nv4x;
+               return screen->advertise_blend_equation_separate;
        case PIPE_CAP_MAX_COMBINED_SAMPLERS:
                return 16;
        case PIPE_CAP_INDEP_BLEND_ENABLE:
@@ -74,60 +74,111 @@ nvfx_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
                return 0;
        case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
        case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
-               return 1;
        case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
        case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
-               return 0;
-       case PIPE_CAP_MAX_FS_INSTRUCTIONS:
-       case PIPE_CAP_MAX_FS_ALU_INSTRUCTIONS:
-       case PIPE_CAP_MAX_FS_TEX_INSTRUCTIONS:
-       case PIPE_CAP_MAX_FS_TEX_INDIRECTIONS:
-               return 4096;
-       case PIPE_CAP_MAX_FS_CONTROL_FLOW_DEPTH:
-               /* FIXME: is it the dynamic (nv30:0/nv40:24) or the static
-                  value (nv30:0/nv40:4) ? */
-               return screen->is_nv4x ? 4 : 0;
-       case PIPE_CAP_MAX_FS_INPUTS:
-               return 10;
-       case PIPE_CAP_MAX_FS_CONSTS:
-               return screen->is_nv4x ? 224 : 32;
-       case PIPE_CAP_MAX_FS_TEMPS:
-               return 32;
-       case PIPE_CAP_MAX_FS_ADDRS:
-               return screen->is_nv4x ? 1 : 0;
-       case PIPE_CAP_MAX_FS_PREDS:
-               return screen->is_nv4x ? 1 : 0;
-       case PIPE_CAP_MAX_VS_INSTRUCTIONS:
-       case PIPE_CAP_MAX_VS_ALU_INSTRUCTIONS:
-               return screen->is_nv4x ? 512 : 256;
-       case PIPE_CAP_MAX_VS_TEX_INSTRUCTIONS:
-       case PIPE_CAP_MAX_VS_TEX_INDIRECTIONS:
-               return screen->is_nv4x ? 512 : 0;
-       case PIPE_CAP_MAX_VS_CONTROL_FLOW_DEPTH:
-               /* FIXME: is it the dynamic (nv30:24/nv40:24) or the static
-                  value (nv30:1/nv40:4) ? */
-               return screen->is_nv4x ? 4 : 1;
-       case PIPE_CAP_MAX_VS_INPUTS:
-               return 16;
-       case PIPE_CAP_MAX_VS_CONSTS:
-               /* XXX: currently more don't work, but it should be possible to make it work */
-               return 212 - 6;
-       case PIPE_CAP_MAX_VS_TEMPS:
-               return screen->is_nv4x ? 32 : 13;
-       case PIPE_CAP_MAX_VS_ADDRS:
-               return 2;
-       case PIPE_CAP_MAX_VS_PREDS:
-               return screen->is_nv4x ? 1 : 0;
-       case PIPE_CAP_GEOMETRY_SHADER4:
-               return 0;
+               return 1;
        case PIPE_CAP_DEPTH_CLAMP:
                return 0; // TODO: implement depth clamp
+       case PIPE_CAP_PRIMITIVE_RESTART:
+               return 0; // TODO: implement primitive restart
+       case PIPE_CAP_SHADER_STENCIL_EXPORT:
+               return 0;
+       case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
+                return 0;
        default:
-               NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+               NOUVEAU_ERR("Warning: unknown PIPE_CAP %d\n", param);
                return 0;
        }
 }
 
+static int
+nvfx_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, enum pipe_shader_cap param)
+{
+       struct nvfx_screen *screen = nvfx_screen(pscreen);
+
+       switch(shader) {
+       case PIPE_SHADER_FRAGMENT:
+               switch(param) {
+               case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+               case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+               case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+               case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+                       return 4096;
+               case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+                       /* FIXME: is it the dynamic (nv30:0/nv40:24) or the static
+                        value (nv30:0/nv40:4) ? */
+                       return screen->use_nv4x ? 4 : 0;
+               case PIPE_SHADER_CAP_MAX_INPUTS:
+                       return screen->use_nv4x ? 12 : 10;
+               case PIPE_SHADER_CAP_MAX_CONSTS:
+                       return screen->use_nv4x ? 224 : 32;
+               case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+                   return 1;
+               case PIPE_SHADER_CAP_MAX_TEMPS:
+                       return 32;
+               case PIPE_SHADER_CAP_MAX_ADDRS:
+                       return screen->use_nv4x ? 1 : 0;
+               case PIPE_SHADER_CAP_MAX_PREDS:
+                       return 0; /* we could expose these, but nothing uses them */
+               case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+                   return 0;
+               case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+               case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+               case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+               case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+                       return 0;
+               case PIPE_SHADER_CAP_SUBROUTINES:
+                       return screen->use_nv4x ? 1 : 0;
+               default:
+                       break;
+               }
+               break;
+       case PIPE_SHADER_VERTEX:
+               switch(param) {
+               case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+               case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+                       return screen->use_nv4x ? 512 : 256;
+               case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+               case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+                       return screen->use_nv4x ? 512 : 0;
+               case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+                       /* FIXME: is it the dynamic (nv30:24/nv40:24) or the static
+                        value (nv30:1/nv40:4) ? */
+                       return screen->use_nv4x ? 4 : 1;
+               case PIPE_SHADER_CAP_MAX_INPUTS:
+                       return 16;
+               case PIPE_SHADER_CAP_MAX_CONSTS:
+                       /* - 6 is for clip planes; Gallium should be fixed to put
+                        * them in the vertex shader itself, so we don't need to reserve these */
+                       return (screen->use_nv4x ? 468 : 256) - 6;
+                    case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+                           return 1;
+               case PIPE_SHADER_CAP_MAX_TEMPS:
+                       return screen->use_nv4x ? 32 : 13;
+               case PIPE_SHADER_CAP_MAX_ADDRS:
+                       return 2;
+               case PIPE_SHADER_CAP_MAX_PREDS:
+                       return 0; /* we could expose these, but nothing uses them */
+               case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+                        return 1;
+               case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+               case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+               case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+                       return 0;
+               case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+                       return 1;
+               case PIPE_SHADER_CAP_SUBROUTINES:
+                       return 1;
+               default:
+                       break;
+               }
+               break;
+       default:
+               break;
+       }
+       return 0;
+}
+
 static float
 nvfx_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_cap param)
 {
@@ -141,9 +192,9 @@ nvfx_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_cap param)
        case PIPE_CAP_MAX_POINT_WIDTH_AA:
                return 64.0;
        case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
-               return screen->is_nv4x ? 16.0 : 8.0;
+               return screen->use_nv4x ? 16.0 : 8.0;
        case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
-               return screen->is_nv4x ? 16.0 : 4.0;
+               return 15.0;
        default:
                NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
                return 0.0;
@@ -155,10 +206,13 @@ nvfx_screen_is_format_supported(struct pipe_screen *pscreen,
                                     enum pipe_format format,
                                     enum pipe_texture_target target,
                                     unsigned sample_count,
-                                    unsigned bind, unsigned geom_flags)
+                                     unsigned bind)
 {
        struct nvfx_screen *screen = nvfx_screen(pscreen);
 
+        if (!util_format_is_supported(format, bind))
+                return FALSE;
+
         if (sample_count > 1)
                return FALSE;
 
@@ -166,8 +220,18 @@ nvfx_screen_is_format_supported(struct pipe_screen *pscreen,
                switch (format) {
                case PIPE_FORMAT_B8G8R8A8_UNORM:
                case PIPE_FORMAT_B8G8R8X8_UNORM:
+               case PIPE_FORMAT_R8G8B8A8_UNORM:
+               case PIPE_FORMAT_R8G8B8X8_UNORM:
                case PIPE_FORMAT_B5G6R5_UNORM:
                        break;
+               case PIPE_FORMAT_R16G16B16A16_FLOAT:
+                       if(!screen->advertise_fp16)
+                               return FALSE;
+                       break;
+               case PIPE_FORMAT_R32G32B32A32_FLOAT:
+                       if(!screen->advertise_fp32)
+                               return FALSE;
+                       break;
                default:
                        return FALSE;
                }
@@ -188,8 +252,11 @@ nvfx_screen_is_format_supported(struct pipe_screen *pscreen,
                struct nvfx_texture_format* tf = &nvfx_texture_formats[format];
                if(util_format_is_s3tc(format) && !util_format_s3tc_enabled)
                        return FALSE;
-
-               if(screen->is_nv4x)
+               if(format == PIPE_FORMAT_R16G16B16A16_FLOAT && !screen->advertise_fp16)
+                       return FALSE;
+               if(format == PIPE_FORMAT_R32G32B32A32_FLOAT && !screen->advertise_fp32)
+                       return FALSE;
+               if(screen->use_nv4x)
                {
                        if(tf->fmt[4] < 0)
                                return FALSE;
@@ -241,93 +308,101 @@ nvfx_screen_destroy(struct pipe_screen *pscreen)
 static void nv30_screen_init(struct nvfx_screen *screen)
 {
        struct nouveau_channel *chan = screen->base.channel;
+       struct nouveau_grobj *eng3d = screen->eng3d;
        int i;
 
        /* TODO: perhaps we should do some of this on nv40 too? */
        for (i=1; i<8; i++) {
-               OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_CLIP_HORIZ(i), 1));
+               BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_CLIP_HORIZ(i), 1);
                OUT_RING(chan, 0);
-               OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_CLIP_VERT(i), 1));
+               BEGIN_RING(chan, eng3d, NV30_3D_VIEWPORT_CLIP_VERT(i), 1);
                OUT_RING(chan, 0);
        }
 
-       OUT_RING(chan, RING_3D(0x220, 1));
+       BEGIN_RING(chan, eng3d, 0x220, 1);
        OUT_RING(chan, 1);
 
-       OUT_RING(chan, RING_3D(0x03b0, 1));
+       BEGIN_RING(chan, eng3d, 0x03b0, 1);
        OUT_RING(chan, 0x00100000);
-       OUT_RING(chan, RING_3D(0x1454, 1));
+       BEGIN_RING(chan, eng3d, 0x1454, 1);
        OUT_RING(chan, 0);
-       OUT_RING(chan, RING_3D(0x1d80, 1));
+       BEGIN_RING(chan, eng3d, 0x1d80, 1);
        OUT_RING(chan, 3);
-       OUT_RING(chan, RING_3D(0x1450, 1));
+       BEGIN_RING(chan, eng3d, 0x1450, 1);
        OUT_RING(chan, 0x00030004);
 
        /* NEW */
-       OUT_RING(chan, RING_3D(0x1e98, 1));
+       BEGIN_RING(chan, eng3d, 0x1e98, 1);
        OUT_RING(chan, 0);
-       OUT_RING(chan, RING_3D(0x17e0, 3));
+       BEGIN_RING(chan, eng3d, 0x17e0, 3);
        OUT_RING(chan, fui(0.0));
        OUT_RING(chan, fui(0.0));
        OUT_RING(chan, fui(1.0));
-       OUT_RING(chan, RING_3D(0x1f80, 16));
+       BEGIN_RING(chan, eng3d, 0x1f80, 16);
        for (i=0; i<16; i++) {
                OUT_RING(chan, (i==8) ? 0x0000ffff : 0);
        }
 
-       OUT_RING(chan, RING_3D(0x120, 3));
+       BEGIN_RING(chan, eng3d, 0x120, 3);
        OUT_RING(chan, 0);
        OUT_RING(chan, 1);
        OUT_RING(chan, 2);
 
-       OUT_RING(chan, RING_3D(0x1d88, 1));
+       BEGIN_RING(chan, eng3d, 0x1d88, 1);
        OUT_RING(chan, 0x00001200);
 
-       OUT_RING(chan, RING_3D(NV34TCL_RC_ENABLE, 1));
+       BEGIN_RING(chan, eng3d, NV30_3D_RC_ENABLE, 1);
        OUT_RING(chan, 0);
 
-       OUT_RING(chan, RING_3D(NV34TCL_DEPTH_RANGE_NEAR, 2));
+       BEGIN_RING(chan, eng3d, NV30_3D_DEPTH_RANGE_NEAR, 2);
        OUT_RING(chan, fui(0.0));
        OUT_RING(chan, fui(1.0));
 
-       OUT_RING(chan, RING_3D(NV34TCL_MULTISAMPLE_CONTROL, 1));
+       BEGIN_RING(chan, eng3d, NV30_3D_MULTISAMPLE_CONTROL, 1);
        OUT_RING(chan, 0xffff0000);
 
        /* enables use of vp rather than fixed-function somehow */
-       OUT_RING(chan, RING_3D(0x1e94, 1));
+       BEGIN_RING(chan, eng3d, 0x1e94, 1);
        OUT_RING(chan, 0x13);
 }
 
 static void nv40_screen_init(struct nvfx_screen *screen)
 {
        struct nouveau_channel *chan = screen->base.channel;
+       struct nouveau_grobj *eng3d = screen->eng3d;
 
-       OUT_RING(chan, RING_3D(NV40TCL_DMA_COLOR2, 2));
+       BEGIN_RING(chan, eng3d, NV40_3D_DMA_COLOR2, 2);
        OUT_RING(chan, screen->base.channel->vram->handle);
        OUT_RING(chan, screen->base.channel->vram->handle);
 
-       OUT_RING(chan, RING_3D(0x1ea4, 3));
+       BEGIN_RING(chan, eng3d, 0x1450, 1);
+       OUT_RING(chan, 0x00000004);
+
+       BEGIN_RING(chan, eng3d, 0x1ea4, 3);
        OUT_RING(chan, 0x00000010);
        OUT_RING(chan, 0x01000100);
        OUT_RING(chan, 0xff800006);
 
        /* vtxprog output routing */
-       OUT_RING(chan, RING_3D(0x1fc4, 1));
+       BEGIN_RING(chan, eng3d, 0x1fc4, 1);
        OUT_RING(chan, 0x06144321);
-       OUT_RING(chan, RING_3D(0x1fc8, 2));
+       BEGIN_RING(chan, eng3d, 0x1fc8, 2);
        OUT_RING(chan, 0xedcba987);
-       OUT_RING(chan, 0x00000021);
-       OUT_RING(chan, RING_3D(0x1fd0, 1));
+       OUT_RING(chan, 0x0000006f);
+       BEGIN_RING(chan, eng3d, 0x1fd0, 1);
        OUT_RING(chan, 0x00171615);
-       OUT_RING(chan, RING_3D(0x1fd4, 1));
+       BEGIN_RING(chan, eng3d, 0x1fd4, 1);
        OUT_RING(chan, 0x001b1a19);
 
-       OUT_RING(chan, RING_3D(0x1ef8, 1));
+       BEGIN_RING(chan, eng3d, 0x1ef8, 1);
        OUT_RING(chan, 0x0020ffff);
-       OUT_RING(chan, RING_3D(0x1d64, 1));
-       OUT_RING(chan, 0x00d30000);
-       OUT_RING(chan, RING_3D(0x1e94, 1));
+       BEGIN_RING(chan, eng3d, 0x1d64, 1);
+       OUT_RING(chan, 0x01d300d4);
+       BEGIN_RING(chan, eng3d, 0x1e94, 1);
        OUT_RING(chan, 0x00000001);
+
+       BEGIN_RING(chan, eng3d, NV40_3D_MIPMAP_ROUNDING, 1);
+       OUT_RING(chan, NV40_3D_MIPMAP_ROUNDING_MODE_DOWN);
 }
 
 static unsigned
@@ -344,19 +419,6 @@ nvfx_screen_get_vertex_buffer_flags(struct nvfx_screen* screen)
                vram_hack_default = 1;
        vram_hack = debug_get_bool_option("NOUVEAU_VTXIDX_IN_VRAM", vram_hack_default);
 
-#ifdef DEBUG
-       if(!vram_hack)
-       {
-               fprintf(stderr, "Some systems may experience graphics corruption due to randomly misplaced vertices.\n"
-                       "If this is happening, export NOUVEAU_VTXIDX_IN_VRAM=1 may reduce or eliminate the problem\n");
-       }
-       else
-       {
-               fprintf(stderr, "A performance reducing hack is being used to help avoid graphics corruption.\n"
-                       "You can try export NOUVEAU_VTXIDX_IN_VRAM=0 to disable it.\n");
-       }
-#endif
-
        return vram_hack ? NOUVEAU_BO_VRAM : NOUVEAU_BO_GART;
 }
 
@@ -396,29 +458,31 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
        pscreen->winsys = ws;
        pscreen->destroy = nvfx_screen_destroy;
        pscreen->get_param = nvfx_screen_get_param;
+       pscreen->get_shader_param = nvfx_screen_get_shader_param;
        pscreen->get_paramf = nvfx_screen_get_paramf;
        pscreen->is_format_supported = nvfx_screen_is_format_supported;
        pscreen->context_create = nvfx_create;
+       pscreen->video_context_create = nvfx_video_create;
 
        switch (dev->chipset & 0xf0) {
        case 0x30:
-               if (NV30TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f)))
-                       eng3d_class = 0x0397;
-               else if (NV34TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f)))
-                       eng3d_class = 0x0697;
-               else if (NV35TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f)))
-                       eng3d_class = 0x0497;
+               if (NV30_3D_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f)))
+                       eng3d_class = NV30_3D;
+               else if (NV34_3D_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f)))
+                       eng3d_class = NV34_3D;
+               else if (NV35_3D_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f)))
+                       eng3d_class = NV35_3D;
                break;
        case 0x40:
                if (NV4X_GRCLASS4097_CHIPSETS & (1 << (dev->chipset & 0x0f)))
-                       eng3d_class = NV40TCL;
+                       eng3d_class = NV40_3D;
                else if (NV4X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f)))
-                       eng3d_class = NV44TCL;
+                       eng3d_class = NV44_3D;
                screen->is_nv4x = ~0;
                break;
        case 0x60:
                if (NV6X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f)))
-                       eng3d_class = NV44TCL;
+                       eng3d_class = NV44_3D;
                screen->is_nv4x = ~0;
                break;
        }
@@ -428,17 +492,37 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
                return NULL;
        }
 
-       screen->force_swtnl = debug_get_bool_option("NOUVEAU_SWTNL", FALSE);
+       screen->advertise_npot = !!screen->is_nv4x;
+       screen->advertise_blend_equation_separate = !!screen->is_nv4x;
+       screen->use_nv4x = screen->is_nv4x;
+
+       if(screen->is_nv4x) {
+               if(debug_get_bool_option("NVFX_SIMULATE_NV30", FALSE))
+                       screen->use_nv4x = 0;
+               if(!debug_get_bool_option("NVFX_NPOT", TRUE))
+                       screen->advertise_npot = 0;
+               if(!debug_get_bool_option("NVFX_BLEND_EQ_SEP", TRUE))
+                       screen->advertise_blend_equation_separate = 0;
+       }
+
+       screen->force_swtnl = debug_get_bool_option("NVFX_SWTNL", FALSE);
        screen->trace_draw = debug_get_bool_option("NVFX_TRACE_DRAW", FALSE);
 
        screen->buffer_allocation_cost = debug_get_num_option("NVFX_BUFFER_ALLOCATION_COST", 16384);
        screen->inline_cost_per_hardware_cost = atof(debug_get_option("NVFX_INLINE_COST_PER_HARDWARE_COST", "1.0"));
        screen->static_reuse_threshold = atof(debug_get_option("NVFX_STATIC_REUSE_THRESHOLD", "2.0"));
 
+       /* We don't advertise these by default because filtering and blending doesn't work as
+        * it should, due to several restrictions.
+        * The only exception is fp16 on nv40.
+        */
+       screen->advertise_fp16 = debug_get_bool_option("NVFX_FP16", !!screen->use_nv4x);
+       screen->advertise_fp32 = debug_get_bool_option("NVFX_FP32", 0);
+
        screen->vertex_buffer_reloc_flags = nvfx_screen_get_vertex_buffer_flags(screen);
 
        /* surely both nv3x and nv44 support index buffers too: find out how and test that */
-       if(eng3d_class == NV40TCL)
+       if(eng3d_class == NV40_3D)
                screen->index_buffer_reloc_flags = screen->vertex_buffer_reloc_flags;
 
        if(!screen->force_swtnl && screen->vertex_buffer_reloc_flags == screen->index_buffer_reloc_flags)
@@ -487,14 +571,8 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
        LIST_INITHEAD(&screen->query_list);
 
        /* Vtxprog resources */
-       if (nouveau_resource_init(&screen->vp_exec_heap, 0, screen->is_nv4x ? 512 : 256) ||
-           /* XXX: this should actually be 468 or 256, but apparently indirect addressing
-            * cannot read consts starting from 212 on nv40.
-            * It looks like 44 slots are reserved for something, and there is a "mode switch"
-            * from 256 slots to 512 slots that we are setting to "256 mode" on nv40, leading
-            * to 212 = 256 - 44 instead of 468 = 512 - 44 usable slots.
-            */
-           nouveau_resource_init(&screen->vp_data_heap, 0, 212)) {
+       if (nouveau_resource_init(&screen->vp_exec_heap, 0, screen->use_nv4x ? 512 : 256) ||
+           nouveau_resource_init(&screen->vp_data_heap, 0, screen->use_nv4x ? 468 : 256)) {
                nvfx_screen_destroy(pscreen);
                return NULL;
        }
@@ -503,25 +581,25 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 
        /* Static eng3d initialisation */
        /* note that we just started using the channel, so we must have space in the pushbuffer */
-       OUT_RING(chan, RING_3D(NV34TCL_DMA_NOTIFY, 1));
+       BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_NOTIFY, 1);
        OUT_RING(chan, screen->sync->handle);
-       OUT_RING(chan, RING_3D(NV34TCL_DMA_TEXTURE0, 2));
+       BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_TEXTURE0, 2);
        OUT_RING(chan, chan->vram->handle);
        OUT_RING(chan, chan->gart->handle);
-       OUT_RING(chan, RING_3D(NV34TCL_DMA_COLOR1, 1));
+       BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_COLOR1, 1);
        OUT_RING(chan, chan->vram->handle);
-       OUT_RING(chan, RING_3D(NV34TCL_DMA_COLOR0, 2));
+       BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_COLOR0, 2);
        OUT_RING(chan, chan->vram->handle);
        OUT_RING(chan, chan->vram->handle);
-       OUT_RING(chan, RING_3D(NV34TCL_DMA_VTXBUF0, 2));
+       BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_VTXBUF0, 2);
        OUT_RING(chan, chan->vram->handle);
        OUT_RING(chan, chan->gart->handle);
 
-       OUT_RING(chan, RING_3D(NV34TCL_DMA_FENCE, 2));
+       BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_FENCE, 2);
        OUT_RING(chan, 0);
        OUT_RING(chan, screen->query->handle);
 
-       OUT_RING(chan, RING_3D(NV34TCL_DMA_IN_MEMORY7, 2));
+       BEGIN_RING(chan, screen->eng3d, NV30_3D_DMA_UNK1AC, 2);
        OUT_RING(chan, chan->vram->handle);
        OUT_RING(chan, chan->vram->handle);