radeonsi: emit GS_OUT_PRIM_TYPE only if it changes
[mesa.git] / src / gallium / drivers / radeonsi / si_pipe.c
index 14dfd30bfb07e62fd24be1565c417edb47b523c9..8fc5c19dfd766d076fe03a778eeddf32f2b20079 100644 (file)
 
 #include "si_pipe.h"
 #include "si_public.h"
+#include "sid.h"
 
 #include "radeon/radeon_uvd.h"
-#include "util/u_blitter.h"
 #include "util/u_memory.h"
-#include "util/u_simple_shaders.h"
 #include "vl/vl_decoder.h"
 
 /*
  * pipe_context
  */
-void si_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence,
-             unsigned flags)
-{
-       struct si_context *sctx = (struct si_context *)ctx;
-       struct pipe_query *render_cond = NULL;
-       boolean render_cond_cond = FALSE;
-       unsigned render_cond_mode = 0;
-
-       if (fence) {
-               *fence = sctx->b.ws->cs_create_fence(sctx->b.rings.gfx.cs);
-       }
-
-       /* Disable render condition. */
-       if (sctx->b.current_render_cond) {
-               render_cond = sctx->b.current_render_cond;
-               render_cond_cond = sctx->b.current_render_cond_cond;
-               render_cond_mode = sctx->b.current_render_cond_mode;
-               ctx->render_condition(ctx, NULL, FALSE, 0);
-       }
-
-       si_context_flush(sctx, flags);
-
-       /* Re-enable render condition. */
-       if (render_cond) {
-               ctx->render_condition(ctx, render_cond, render_cond_cond, render_cond_mode);
-       }
-}
-
-static void si_flush_from_st(struct pipe_context *ctx,
-                            struct pipe_fence_handle **fence,
-                            unsigned flags)
-{
-       si_flush(ctx, fence,
-                flags & PIPE_FLUSH_END_OF_FRAME ? RADEON_FLUSH_END_OF_FRAME : 0);
-}
-
-static void si_flush_from_winsys(void *ctx, unsigned flags)
-{
-       si_flush((struct pipe_context*)ctx, NULL, flags);
-}
-
 static void si_destroy_context(struct pipe_context *context)
 {
        struct si_context *sctx = (struct si_context *)context;
 
        si_release_all_descriptors(sctx);
 
+       pipe_resource_reference(&sctx->esgs_ring, NULL);
+       pipe_resource_reference(&sctx->gsvs_ring, NULL);
        pipe_resource_reference(&sctx->null_const_buf.buffer, NULL);
        r600_resource_reference(&sctx->border_color_table, NULL);
 
-       if (sctx->gs_on) {
-               si_pm4_free_state(sctx, sctx->gs_on, 0);
-       }
-       if (sctx->gs_off) {
-               si_pm4_free_state(sctx, sctx->gs_off, 0);
-       }
-       if (sctx->gs_rings) {
-               si_pm4_free_state(sctx, sctx->gs_rings, 0);
-       }
+       si_pm4_delete_state(sctx, gs_rings, sctx->gs_rings);
+       si_pm4_delete_state(sctx, gs_onoff, sctx->gs_on);
+       si_pm4_delete_state(sctx, gs_onoff, sctx->gs_off);
 
        if (sctx->dummy_pixel_shader) {
                sctx->b.b.delete_fs_state(&sctx->b.b, sctx->dummy_pixel_shader);
        }
-       for (int i = 0; i < 8; i++) {
-               sctx->b.b.delete_depth_stencil_alpha_state(&sctx->b.b, sctx->custom_dsa_flush_depth_stencil[i]);
-               sctx->b.b.delete_depth_stencil_alpha_state(&sctx->b.b, sctx->custom_dsa_flush_depth[i]);
-               sctx->b.b.delete_depth_stencil_alpha_state(&sctx->b.b, sctx->custom_dsa_flush_stencil[i]);
-       }
-       sctx->b.b.delete_depth_stencil_alpha_state(&sctx->b.b, sctx->custom_dsa_flush_inplace);
+       sctx->b.b.delete_depth_stencil_alpha_state(&sctx->b.b, sctx->custom_dsa_flush);
        sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_resolve);
        sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_decompress);
-       util_unreference_framebuffer_state(&sctx->framebuffer);
+       sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_fastclear);
+       util_unreference_framebuffer_state(&sctx->framebuffer.state);
 
        util_blitter_destroy(sctx->blitter);
 
+       si_pm4_cleanup(sctx);
+
        r600_common_context_cleanup(&sctx->b);
        FREE(sctx);
 }
@@ -116,6 +68,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void *
 {
        struct si_context *sctx = CALLOC_STRUCT(si_context);
        struct si_screen* sscreen = (struct si_screen *)screen;
+       struct radeon_winsys *ws = sscreen->b.ws;
        int shader, i;
 
        if (sctx == NULL)
@@ -124,7 +77,6 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void *
        sctx->b.b.screen = screen; /* this must be set first */
        sctx->b.b.priv = priv;
        sctx->b.b.destroy = si_destroy_context;
-       sctx->b.b.flush = si_flush_from_st;
        sctx->screen = sscreen; /* Easy accessing of screen/winsys. */
 
        if (!r600_common_context_init(&sctx->b, &sscreen->b))
@@ -141,21 +93,28 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void *
                sctx->b.b.create_video_buffer = vl_video_buffer_create;
        }
 
-       sctx->b.rings.gfx.cs = sctx->b.ws->cs_create(sctx->b.ws, RING_GFX, NULL);
-       sctx->b.rings.gfx.flush = si_flush_from_winsys;
+       sctx->b.rings.gfx.cs = ws->cs_create(ws, RING_GFX, si_context_gfx_flush,
+                                            sctx, sscreen->b.trace_bo ?
+                                               sscreen->b.trace_bo->cs_buf : NULL);
+       sctx->b.rings.gfx.flush = si_context_gfx_flush;
 
        si_init_all_descriptors(sctx);
 
        /* Initialize cache_flush. */
        sctx->cache_flush = si_atom_cache_flush;
-       sctx->atoms.cache_flush = &sctx->cache_flush;
+       sctx->atoms.s.cache_flush = &sctx->cache_flush;
 
-       sctx->atoms.streamout_begin = &sctx->b.streamout.begin_atom;
+       sctx->msaa_config = si_atom_msaa_config;
+       sctx->atoms.s.msaa_config = &sctx->msaa_config;
+
+       sctx->atoms.s.streamout_begin = &sctx->b.streamout.begin_atom;
+       sctx->atoms.s.streamout_enable = &sctx->b.streamout.enable_atom;
 
        switch (sctx->b.chip_class) {
        case SI:
        case CIK:
                si_init_state_functions(sctx);
+               si_init_shader_functions(sctx);
                si_init_config(sctx);
                break;
        default:
@@ -163,17 +122,13 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void *
                goto fail;
        }
 
-       sctx->b.ws->cs_set_flush_callback(sctx->b.rings.gfx.cs, si_flush_from_winsys, sctx);
+       if (sscreen->b.debug_flags & DBG_FORCE_DMA)
+               sctx->b.b.resource_copy_region = sctx->b.dma_copy;
 
        sctx->blitter = util_blitter_create(&sctx->b.b);
        if (sctx->blitter == NULL)
                goto fail;
-
-       sctx->dummy_pixel_shader =
-               util_make_fragment_cloneinput_shader(&sctx->b.b, 0,
-                                                    TGSI_SEMANTIC_GENERIC,
-                                                    TGSI_INTERPOLATE_CONSTANT);
-       sctx->b.b.bind_fs_state(&sctx->b.b, sctx->dummy_pixel_shader);
+       sctx->blitter->draw_rectangle = r600_draw_rectangle;
 
        /* these must be last */
        si_begin_new_cs(sctx);
@@ -183,11 +138,11 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void *
         * with a NULL buffer). We need to use a dummy buffer instead. */
        if (sctx->b.chip_class == CIK) {
                sctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER,
-                                                                PIPE_USAGE_STATIC, 16);
+                                                                PIPE_USAGE_DEFAULT, 16);
                sctx->null_const_buf.buffer_size = sctx->null_const_buf.buffer->width0;
 
                for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
-                       for (i = 0; i < NUM_CONST_BUFFERS; i++) {
+                       for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) {
                                sctx->b.b.set_constant_buffer(&sctx->b.b, shader, i,
                                                              &sctx->null_const_buf);
                        }
@@ -229,6 +184,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
        case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
        case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
        case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+       case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
        case PIPE_CAP_SM3:
        case PIPE_CAP_SEAMLESS_CUBE_MAP:
        case PIPE_CAP_PRIMITIVE_RESTART:
@@ -250,41 +206,53 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
        case PIPE_CAP_TGSI_INSTANCEID:
        case PIPE_CAP_COMPUTE:
        case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
-        case PIPE_CAP_TGSI_VS_LAYER:
+        case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
        case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
+       case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
+       case PIPE_CAP_CUBE_MAP_ARRAY:
+       case PIPE_CAP_SAMPLE_SHADING:
+       case PIPE_CAP_DRAW_INDIRECT:
+       case PIPE_CAP_CLIP_HALFZ:
+       case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
                return 1;
 
        case PIPE_CAP_TEXTURE_MULTISAMPLE:
                /* 2D tiling on CIK is supported since DRM 2.35.0 */
-               return HAVE_LLVM >= 0x0304 && (sscreen->b.chip_class < CIK ||
-                                              sscreen->b.info.drm_minor >= 35);
-
-       case PIPE_CAP_TGSI_TEXCOORD:
-               return 0;
+               return sscreen->b.chip_class < CIK ||
+                      sscreen->b.info.drm_minor >= 35;
 
         case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
-                return 64;
+                return R600_MAP_BUFFER_ALIGNMENT;
 
        case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
-               return 256;
+       case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
+               return 4;
 
        case PIPE_CAP_GLSL_FEATURE_LEVEL:
-               return HAVE_LLVM >= 0x0305 ? 330 : 140;
+               return 330;
 
-       case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
-               return 1;
        case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
                return MIN2(sscreen->b.info.vram_size, 0xFFFFFFFF);
 
+       case PIPE_CAP_TEXTURE_QUERY_LOD:
+       case PIPE_CAP_TEXTURE_GATHER_SM5:
+               return HAVE_LLVM >= 0x0305;
+       case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
+               return HAVE_LLVM >= 0x0305 ? 4 : 0;
+
        /* Unsupported features. */
        case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
-       case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
        case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
        case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
        case PIPE_CAP_VERTEX_COLOR_CLAMPED:
        case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
        case PIPE_CAP_USER_VERTEX_BUFFERS:
-       case PIPE_CAP_CUBE_MAP_ARRAY:
+       case PIPE_CAP_TGSI_TEXCOORD:
+       case PIPE_CAP_FAKE_SW_MSAA:
+       case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
+       case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
+       case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+       case PIPE_CAP_SAMPLER_VIEW_TARGET:
                return 0;
 
        case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
@@ -299,13 +267,27 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
        case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
                return sscreen->b.has_streamout ? 32*4 : 0;
 
+       /* Geometry shader output. */
+       case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
+               return 1024;
+       case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
+               return 4095;
+       case PIPE_CAP_MAX_VERTEX_STREAMS:
+               return 1;
+
+       case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
+               return 2048;
+
        /* Texturing. */
        case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
-       case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
        case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
-                       return 15;
+               return 15; /* 16384 */
+       case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+               /* textures support 8192, but layered rendering supports 2048 */
+               return 12;
        case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
-               return 16384;
+               /* textures support 8192, but layered rendering supports 2048 */
+               return 2048;
 
        /* Render targets. */
        case PIPE_CAP_MAX_RENDER_TARGETS:
@@ -319,13 +301,27 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
        case PIPE_CAP_QUERY_TIME_ELAPSED:
                return sscreen->b.info.r600_clock_crystal_freq != 0;
 
+       case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
        case PIPE_CAP_MIN_TEXEL_OFFSET:
-               return -8;
+               return -32;
 
+       case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
        case PIPE_CAP_MAX_TEXEL_OFFSET:
-               return 7;
+               return 31;
+
        case PIPE_CAP_ENDIANNESS:
                return PIPE_ENDIAN_LITTLE;
+
+       case PIPE_CAP_VENDOR_ID:
+               return 0x1002;
+       case PIPE_CAP_DEVICE_ID:
+               return sscreen->b.info.pci_id;
+       case PIPE_CAP_ACCELERATED:
+               return 1;
+       case PIPE_CAP_VIDEO_MEMORY:
+               return sscreen->b.info.vram_size >> 20;
+       case PIPE_CAP_UMA:
+               return 0;
        }
        return 0;
 }
@@ -336,16 +332,26 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
        {
        case PIPE_SHADER_FRAGMENT:
        case PIPE_SHADER_VERTEX:
-               break;
        case PIPE_SHADER_GEOMETRY:
-#if HAVE_LLVM < 0x0305
-               return 0;
-#endif
                break;
        case PIPE_SHADER_COMPUTE:
                switch (param) {
                case PIPE_SHADER_CAP_PREFERRED_IR:
+#if HAVE_LLVM < 0x0306
                        return PIPE_SHADER_IR_LLVM;
+#else
+                       return PIPE_SHADER_IR_NATIVE;
+#endif
+               case PIPE_SHADER_CAP_DOUBLES:
+                       return 0; /* XXX: Enable doubles once the compiler can
+                                    handle them. */
+               case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: {
+                       uint64_t max_const_buffer_size;
+                       pscreen->get_compute_param(pscreen,
+                               PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
+                               &max_const_buffer_size);
+                       return max_const_buffer_size;
+               }
                default:
                        return 0;
                }
@@ -363,16 +369,15 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
        case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
                return 32;
        case PIPE_SHADER_CAP_MAX_INPUTS:
-               return 32;
+               return shader == PIPE_SHADER_VERTEX ? SI_NUM_VERTEX_BUFFERS : 32;
+       case PIPE_SHADER_CAP_MAX_OUTPUTS:
+               return shader == PIPE_SHADER_FRAGMENT ? 8 : 32;
        case PIPE_SHADER_CAP_MAX_TEMPS:
                return 256; /* Max native temporaries. */
-       case PIPE_SHADER_CAP_MAX_ADDRS:
-               /* FIXME Isn't this equal to TEMPS? */
-               return 1; /* Max native address registers */
-       case PIPE_SHADER_CAP_MAX_CONSTS:
-               return 4096; /* actually only memory limits this */
+       case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
+               return 4096 * sizeof(float[4]); /* actually only memory limits this */
        case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
-               return NUM_PIPE_CONST_BUFFERS;
+               return SI_NUM_USER_CONST_BUFFERS;
        case PIPE_SHADER_CAP_MAX_PREDS:
                return 0; /* FIXME */
        case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
@@ -397,6 +402,8 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
                return 16;
        case PIPE_SHADER_CAP_PREFERRED_IR:
                return PIPE_SHADER_IR_TGSI;
+       case PIPE_SHADER_CAP_DOUBLES:
+               return 0;
        }
        return 0;
 }
@@ -408,12 +415,63 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
        if (sscreen == NULL)
                return;
 
-       if (!radeon_winsys_unref(sscreen->b.ws))
+       if (!sscreen->b.ws->unref(sscreen->b.ws))
                return;
 
        r600_destroy_common_screen(&sscreen->b);
 }
 
+#define SI_TILE_MODE_COLOR_2D_8BPP  14
+
+/* Initialize pipe config. This is especially important for GPUs
+ * with 16 pipes and more where it's initialized incorrectly by
+ * the TILING_CONFIG ioctl. */
+static bool si_initialize_pipe_config(struct si_screen *sscreen)
+{
+       unsigned mode2d;
+
+       /* This is okay, because there can be no 2D tiling without
+        * the tile mode array, so we won't need the pipe config.
+        * Return "success".
+        */
+       if (!sscreen->b.info.si_tile_mode_array_valid)
+               return true;
+
+       /* The same index is used for the 2D mode on CIK too. */
+       mode2d = sscreen->b.info.si_tile_mode_array[SI_TILE_MODE_COLOR_2D_8BPP];
+
+       switch (G_009910_PIPE_CONFIG(mode2d)) {
+       case V_02803C_ADDR_SURF_P2:
+               sscreen->b.tiling_info.num_channels = 2;
+               break;
+       case V_02803C_X_ADDR_SURF_P4_8X16:
+       case V_02803C_X_ADDR_SURF_P4_16X16:
+       case V_02803C_X_ADDR_SURF_P4_16X32:
+       case V_02803C_X_ADDR_SURF_P4_32X32:
+               sscreen->b.tiling_info.num_channels = 4;
+               break;
+       case V_02803C_X_ADDR_SURF_P8_16X16_8X16:
+       case V_02803C_X_ADDR_SURF_P8_16X32_8X16:
+       case V_02803C_X_ADDR_SURF_P8_32X32_8X16:
+       case V_02803C_X_ADDR_SURF_P8_16X32_16X16:
+       case V_02803C_X_ADDR_SURF_P8_32X32_16X16:
+       case V_02803C_X_ADDR_SURF_P8_32X32_16X32:
+       case V_02803C_X_ADDR_SURF_P8_32X64_32X32:
+               sscreen->b.tiling_info.num_channels = 8;
+               break;
+       case V_02803C_X_ADDR_SURF_P16_32X32_8X16:
+       case V_02803C_X_ADDR_SURF_P16_32X32_16X16:
+               sscreen->b.tiling_info.num_channels = 16;
+               break;
+       default:
+               assert(0);
+               fprintf(stderr, "radeonsi: Unknown pipe config %i.\n",
+                       G_009910_PIPE_CONFIG(mode2d));
+               return false;
+       }
+       return true;
+}
+
 struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
 {
        struct si_screen *sscreen = CALLOC_STRUCT(si_screen);
@@ -429,13 +487,14 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
        sscreen->b.b.is_format_supported = si_is_format_supported;
        sscreen->b.b.resource_create = r600_resource_create_common;
 
-       if (!r600_common_screen_init(&sscreen->b, ws)) {
+       if (!r600_common_screen_init(&sscreen->b, ws) ||
+           !si_initialize_pipe_config(sscreen)) {
                FREE(sscreen);
                return NULL;
        }
 
        sscreen->b.has_cp_dma = true;
-       sscreen->b.has_streamout = HAVE_LLVM >= 0x0304;
+       sscreen->b.has_streamout = true;
 
        if (debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE))
                sscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS;