nv50: rework state emission
[mesa.git] / src / gallium / drivers / nv50 / nv50_screen.c
index c672ea471a396c50230df4613f6de3ca9d8454b5..b530de64b6e044a6f2c0457e3e5cf160b56b8a48 100644 (file)
@@ -35,9 +35,9 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
 {
        if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
                switch (format) {
-               case PIPE_FORMAT_X8R8G8B8_UNORM:
-               case PIPE_FORMAT_A8R8G8B8_UNORM:
-               case PIPE_FORMAT_R5G6B5_UNORM:
+               case PIPE_FORMAT_B8G8R8X8_UNORM:
+               case PIPE_FORMAT_B8G8R8A8_UNORM:
+               case PIPE_FORMAT_B5G6R5_UNORM:
                case PIPE_FORMAT_R16G16B16A16_SNORM:
                case PIPE_FORMAT_R16G16B16A16_UNORM:
                case PIPE_FORMAT_R32G32B32A32_FLOAT:
@@ -51,30 +51,31 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
        if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) {
                switch (format) {
                case PIPE_FORMAT_Z32_FLOAT:
-               case PIPE_FORMAT_Z24S8_UNORM:
-               case PIPE_FORMAT_X8Z24_UNORM:
                case PIPE_FORMAT_S8Z24_UNORM:
+               case PIPE_FORMAT_Z24X8_UNORM:
+               case PIPE_FORMAT_Z24S8_UNORM:
                        return TRUE;
                default:
                        break;
                }
        } else {
                switch (format) {
-               case PIPE_FORMAT_A8R8G8B8_UNORM:
-               case PIPE_FORMAT_X8R8G8B8_UNORM:
-               case PIPE_FORMAT_A8R8G8B8_SRGB:
-               case PIPE_FORMAT_X8R8G8B8_SRGB:
-               case PIPE_FORMAT_A1R5G5B5_UNORM:
-               case PIPE_FORMAT_A4R4G4B4_UNORM:
-               case PIPE_FORMAT_R5G6B5_UNORM:
+               case PIPE_FORMAT_B8G8R8A8_UNORM:
+               case PIPE_FORMAT_B8G8R8X8_UNORM:
+               case PIPE_FORMAT_B8G8R8A8_SRGB:
+               case PIPE_FORMAT_B8G8R8X8_SRGB:
+               case PIPE_FORMAT_B5G5R5A1_UNORM:
+               case PIPE_FORMAT_B4G4R4A4_UNORM:
+               case PIPE_FORMAT_B5G6R5_UNORM:
                case PIPE_FORMAT_L8_UNORM:
                case PIPE_FORMAT_A8_UNORM:
                case PIPE_FORMAT_I8_UNORM:
-               case PIPE_FORMAT_A8L8_UNORM:
+               case PIPE_FORMAT_L8A8_UNORM:
                case PIPE_FORMAT_DXT1_RGB:
                case PIPE_FORMAT_DXT1_RGBA:
                case PIPE_FORMAT_DXT3_RGBA:
                case PIPE_FORMAT_DXT5_RGBA:
+               case PIPE_FORMAT_S8Z24_UNORM:
                case PIPE_FORMAT_Z24S8_UNORM:
                case PIPE_FORMAT_Z32_FLOAT:
                case PIPE_FORMAT_R16G16B16A16_SNORM:
@@ -97,6 +98,10 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param)
        switch (param) {
        case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
                return 32;
+       case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
+               return 32;
+       case PIPE_CAP_MAX_COMBINED_SAMPLERS:
+               return 64;
        case PIPE_CAP_NPOT_TEXTURES:
                return 1;
        case PIPE_CAP_TWO_SIDED_STENCIL:
@@ -122,15 +127,23 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param)
        case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
        case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
                return 1;
-       case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
-               return 0;
        case PIPE_CAP_TGSI_CONT_SUPPORTED:
-               return 0;
+               return 1;
        case PIPE_CAP_BLEND_EQUATION_SEPARATE:
                return 1;
        case NOUVEAU_CAP_HW_VTXBUF:
                return 1;
        case NOUVEAU_CAP_HW_IDXBUF:
+               return 1;
+       case PIPE_CAP_INDEP_BLEND_ENABLE:
+               return 1;
+       case PIPE_CAP_INDEP_BLEND_FUNC:
+               return 0;
+       case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+       case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+               return 1;
+       case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
+       case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
                return 0;
        default:
                NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
@@ -162,15 +175,55 @@ static void
 nv50_screen_destroy(struct pipe_screen *pscreen)
 {
        struct nv50_screen *screen = nv50_screen(pscreen);
+       unsigned i;
+
+       for (i = 0; i < 3; i++) {
+               if (screen->constbuf_parm[i])
+                       nouveau_bo_ref(NULL, &screen->constbuf_parm[i]);
+       }
+
+       if (screen->constbuf_misc[0])
+               nouveau_bo_ref(NULL, &screen->constbuf_misc[0]);
+       if (screen->tic)
+               nouveau_bo_ref(NULL, &screen->tic);
+       if (screen->tsc)
+               nouveau_bo_ref(NULL, &screen->tsc);
+       if (screen->static_init)
+               so_ref(NULL, &screen->static_init);
 
        nouveau_notifier_free(&screen->sync);
        nouveau_grobj_free(&screen->tesla);
        nouveau_grobj_free(&screen->eng2d);
        nouveau_grobj_free(&screen->m2mf);
+       nouveau_resource_destroy(&screen->immd_heap[0]);
+       nouveau_resource_destroy(&screen->parm_heap[0]);
+       nouveau_resource_destroy(&screen->parm_heap[1]);
        nouveau_screen_fini(&screen->base);
        FREE(screen);
 }
 
+static int
+nv50_pre_pipebuffer_map(struct pipe_screen *pscreen, struct pipe_buffer *pb,
+       unsigned usage)
+{
+       struct nv50_screen *screen = nv50_screen(pscreen);
+       struct nv50_context *ctx = screen->cur_ctx;
+
+       if (!(pb->usage & PIPE_BUFFER_USAGE_VERTEX))
+               return 0;
+
+       /* Our vtxbuf got mapped, it can no longer be considered part of current
+        * state, remove it to avoid emitting reloc markers.
+        */
+       if (ctx && ctx->state.hw[17] && so_bo_is_reloc(ctx->state.hw[17],
+                       nouveau_bo(pb))) {
+               so_ref(NULL, &ctx->state.hw[17]);
+               ctx->dirty |= NV50_NEW_ARRAYS;
+       }
+
+       return 0;
+}
+
 struct pipe_screen *
 nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 {
@@ -198,6 +251,8 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
        pscreen->get_param = nv50_screen_get_param;
        pscreen->get_paramf = nv50_screen_get_paramf;
        pscreen->is_format_supported = nv50_screen_is_format_supported;
+       pscreen->context_create = nv50_create;
+       screen->base.pre_pipebuffer_map_callback = nv50_pre_pipebuffer_map;
 
        nv50_screen_init_miptree_functions(pscreen);
        nv50_transfer_init_screen_functions(pscreen);
@@ -210,7 +265,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
                nv50_screen_destroy(pscreen);
                return NULL;
        }
-       BIND_RING(chan, screen->m2mf, 1);
 
        /* 2D object */
        ret = nouveau_grobj_alloc(chan, 0xbeef502d, NV50_2D, &screen->eng2d);
@@ -219,7 +273,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
                nv50_screen_destroy(pscreen);
                return NULL;
        }
-       BIND_RING(chan, screen->eng2d, 2);
 
        /* 3D object */
        switch (chipset & 0xf0) {
@@ -228,8 +281,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
                break;
        case 0x80:
        case 0x90:
-               /* this stupid name should be corrected. */
-               tesla_class = NV54TCL;
+               tesla_class = NV84TCL;
                break;
        case 0xa0:
                switch (chipset) {
@@ -239,7 +291,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
                        tesla_class = NVA0TCL;
                        break;
                default:
-                       tesla_class = 0x8597;
+                       tesla_class = NVA8TCL;
                        break;
                }
                break;
@@ -256,7 +308,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
                nv50_screen_destroy(pscreen);
                return NULL;
        }
-       BIND_RING(chan, screen->tesla, 3);
 
        /* Sync notifier */
        ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync);
@@ -267,7 +318,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
        }
 
        /* Static M2MF init */
-       so = so_new(32, 0);
+       so = so_new(1, 3, 0);
        so_method(so, screen->m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 3);
        so_data  (so, screen->sync->handle);
        so_data  (so, chan->vram->handle);
@@ -276,7 +327,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
        so_ref (NULL, &so);
 
        /* Static 2D init */
-       so = so_new(64, 0);
+       so = so_new(4, 7, 0);
        so_method(so, screen->eng2d, NV50_2D_DMA_NOTIFY, 4);
        so_data  (so, screen->sync->handle);
        so_data  (so, chan->vram->handle);
@@ -284,7 +335,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
        so_data  (so, chan->vram->handle);
        so_method(so, screen->eng2d, NV50_2D_OPERATION, 1);
        so_data  (so, NV50_2D_OPERATION_SRCCOPY);
-       so_method(so, screen->eng2d, 0x0290, 1);
+       so_method(so, screen->eng2d, NV50_2D_CLIP_ENABLE, 1);
        so_data  (so, 0);
        so_method(so, screen->eng2d, 0x0888, 1);
        so_data  (so, 1);
@@ -292,27 +343,36 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
        so_ref(NULL, &so);
 
        /* Static tesla init */
-       so = so_new(256, 20);
+       so = so_new(47, 95, 24);
 
-       so_method(so, screen->tesla, 0x1558, 1);
-       so_data  (so, 1);
+       so_method(so, screen->tesla, NV50TCL_COND_MODE, 1);
+       so_data  (so, NV50TCL_COND_MODE_ALWAYS);
        so_method(so, screen->tesla, NV50TCL_DMA_NOTIFY, 1);
        so_data  (so, screen->sync->handle);
-       so_method(so, screen->tesla, NV50TCL_DMA_UNK0(0),
-                                    NV50TCL_DMA_UNK0__SIZE);
-       for (i = 0; i < NV50TCL_DMA_UNK0__SIZE; i++)
+       so_method(so, screen->tesla, NV50TCL_DMA_ZETA, 11);
+       for (i = 0; i < 11; i++)
                so_data(so, chan->vram->handle);
-       so_method(so, screen->tesla, NV50TCL_DMA_UNK1(0),
-                                    NV50TCL_DMA_UNK1__SIZE);
-       for (i = 0; i < NV50TCL_DMA_UNK1__SIZE; i++)
+       so_method(so, screen->tesla, NV50TCL_DMA_COLOR(0),
+                                    NV50TCL_DMA_COLOR__SIZE);
+       for (i = 0; i < NV50TCL_DMA_COLOR__SIZE; i++)
                so_data(so, chan->vram->handle);
-       so_method(so, screen->tesla, 0x121c, 1);
+       so_method(so, screen->tesla, NV50TCL_RT_CONTROL, 1);
        so_data  (so, 1);
 
-       so_method(so, screen->tesla, 0x13bc, 1);
-       so_data  (so, 0x54);
+       /* activate all 32 lanes (threads) in a warp */
+       so_method(so, screen->tesla, NV50TCL_WARP_HALVES, 1);
+       so_data  (so, 0x2);
+       so_method(so, screen->tesla, 0x1400, 1);
+       so_data  (so, 0xf);
+
+       /* max TIC (bits 4:8) & TSC (ignored) bindings, per program type */
+       for (i = 0; i < 3; ++i) {
+               so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(i), 1);
+               so_data  (so, 0x54);
+       }
+
        /* origin is top left (set to 1 for bottom left) */
-       so_method(so, screen->tesla, 0x13ac, 1);
+       so_method(so, screen->tesla, NV50TCL_Y_ORIGIN_BOTTOM, 1);
        so_data  (so, 0);
        so_method(so, screen->tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1);
        so_data  (so, 8);
@@ -325,8 +385,8 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
                return NULL;
        }
 
-       for (i = 0; i < 2; i++) {
-               ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (128 * 4) * 4,
+       for (i = 0; i < 3; i++) {
+               ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (256 * 4) * 4,
                                     &screen->constbuf_parm[i]);
                if (ret) {
                        nv50_screen_destroy(pscreen);
@@ -348,7 +408,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
        //  B = buffer ID (maybe more than 1 byte)
        //  N = CB index used in shader instruction
        //  P = program type (0 = VP, 2 = GP, 3 = FP)
-       so_method(so, screen->tesla, 0x1694, 1);
+       so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
        so_data  (so, 0x000BBNP1);
        */
 
@@ -361,27 +421,51 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
        so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
        so_data  (so, 0x00000001 | (NV50_CB_PMISC << 12));
        so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
+       so_data  (so, 0x00000021 | (NV50_CB_PMISC << 12));
+       so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
        so_data  (so, 0x00000031 | (NV50_CB_PMISC << 12));
 
+       /* bind auxiliary constbuf to immediate data bo */
        so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
-       so_reloc (so, screen->constbuf_parm[0], 0, NOUVEAU_BO_VRAM |
-                 NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
-       so_reloc (so, screen->constbuf_parm[0], 0, NOUVEAU_BO_VRAM |
-                 NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
+       so_reloc (so, screen->constbuf_misc[0], (128 * 4) * 4,
+                 NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
+       so_reloc (so, screen->constbuf_misc[0], (128 * 4) * 4,
+                 NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
+       so_data  (so, (NV50_CB_AUX << 16) | 0x00000200);
+       so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
+       so_data  (so, 0x00000201 | (NV50_CB_AUX << 12));
+       so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
+       so_data  (so, 0x00000221 | (NV50_CB_AUX << 12));
+
+       so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
+       so_reloc (so, screen->constbuf_parm[PIPE_SHADER_VERTEX], 0,
+                 NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
+       so_reloc (so, screen->constbuf_parm[PIPE_SHADER_VERTEX], 0,
+                 NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
        so_data  (so, (NV50_CB_PVP << 16) | 0x00000800);
        so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
        so_data  (so, 0x00000101 | (NV50_CB_PVP << 12));
 
        so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
-       so_reloc (so, screen->constbuf_parm[1], 0, NOUVEAU_BO_VRAM |
-                 NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
-       so_reloc (so, screen->constbuf_parm[1], 0, NOUVEAU_BO_VRAM |
-                 NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
+       so_reloc (so, screen->constbuf_parm[PIPE_SHADER_GEOMETRY], 0,
+                 NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
+       so_reloc (so, screen->constbuf_parm[PIPE_SHADER_GEOMETRY], 0,
+                 NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
+       so_data  (so, (NV50_CB_PGP << 16) | 0x00000800);
+       so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
+       so_data  (so, 0x00000121 | (NV50_CB_PGP << 12));
+
+       so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
+       so_reloc (so, screen->constbuf_parm[PIPE_SHADER_FRAGMENT], 0,
+                 NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
+       so_reloc (so, screen->constbuf_parm[PIPE_SHADER_FRAGMENT], 0,
+                 NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
        so_data  (so, (NV50_CB_PFP << 16) | 0x00000800);
        so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
        so_data  (so, 0x00000131 | (NV50_CB_PFP << 12));
 
-       ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 64*8*4, &screen->tic);
+       ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, PIPE_SHADER_TYPES*32*32,
+                            &screen->tic);
        if (ret) {
                nv50_screen_destroy(pscreen);
                return NULL;
@@ -392,9 +476,10 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
                  NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
        so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
                  NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
-       so_data  (so, 0x000007ff);
+       so_data  (so, PIPE_SHADER_TYPES * 32 - 1);
 
-       ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 64*8*4, &screen->tsc);
+       ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, PIPE_SHADER_TYPES*32*32,
+                            &screen->tsc);
        if (ret) {
                nv50_screen_destroy(pscreen);
                return NULL;
@@ -405,27 +490,31 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
                  NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
        so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
                  NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
-       so_data  (so, 0x00000000);
+       so_data  (so, 0x00000000); /* ignored if TSC_LINKED (0x1234) = 1 */
 
 
        /* Vertex array limits - max them out */
        for (i = 0; i < 16; i++) {
-               so_method(so, screen->tesla, NV50TCL_UNK1080_OFFSET_HIGH(i), 2);
+               so_method(so, screen->tesla, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2);
                so_data  (so, 0x000000ff);
                so_data  (so, 0xffffffff);
        }
 
-       so_method(so, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR, 2);
+       so_method(so, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR(0), 2);
        so_data  (so, fui(0.0));
        so_data  (so, fui(1.0));
 
-       so_method(so, screen->tesla, 0x1234, 1);
+       /* no dynamic combination of TIC & TSC entries => only BIND_TIC used */
+       so_method(so, screen->tesla, NV50TCL_LINKED_TSC, 1);
        so_data  (so, 1);
 
        /* activate first scissor rectangle */
-       so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE, 1);
+       so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE(0), 1);
        so_data  (so, 1);
 
+       so_method(so, screen->tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
+       so_data  (so, 1); /* default edgeflag to TRUE */
+
        so_emit(chan, so);
        so_ref (so, &screen->static_init);
        so_ref (NULL, &so);