radeonsi: make si_is_format_supported static
[mesa.git] / src / gallium / drivers / radeonsi / si_pipe.c
index c2ca94339ac708071d8ee89b63776edd0a9d1d15..d83568150e1dde015b2fc63b7bac8331eefc56c2 100644 (file)
  */
 
 #include "si_pipe.h"
+#include "si_shader.h"
 #include "si_public.h"
 #include "sid.h"
 
 #include "radeon/radeon_llvm_emit.h"
 #include "radeon/radeon_uvd.h"
 #include "util/u_memory.h"
+#include "util/u_suballoc.h"
 #include "vl/vl_decoder.h"
 
+#define SI_LLVM_DEFAULT_FEATURES \
+       "+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals"
+
 /*
  * pipe_context
  */
@@ -38,25 +43,30 @@ static void si_destroy_context(struct pipe_context *context)
        struct si_context *sctx = (struct si_context *)context;
        int i;
 
+       si_dec_framebuffer_counters(&sctx->framebuffer.state);
+
        si_release_all_descriptors(sctx);
 
+       if (sctx->ce_suballocator)
+               u_suballocator_destroy(sctx->ce_suballocator);
+
        pipe_resource_reference(&sctx->esgs_ring, NULL);
        pipe_resource_reference(&sctx->gsvs_ring, NULL);
        pipe_resource_reference(&sctx->tf_ring, NULL);
+       pipe_resource_reference(&sctx->tess_offchip_ring, NULL);
        pipe_resource_reference(&sctx->null_const_buf.buffer, NULL);
        r600_resource_reference(&sctx->border_color_buffer, NULL);
        free(sctx->border_color_table);
        r600_resource_reference(&sctx->scratch_buffer, NULL);
+       r600_resource_reference(&sctx->compute_scratch_buffer, NULL);
        sctx->b.ws->fence_reference(&sctx->last_gfx_fence, NULL);
 
        si_pm4_free_state(sctx, sctx->init_config, ~0);
        if (sctx->init_config_gs_rings)
                si_pm4_free_state(sctx, sctx->init_config_gs_rings, ~0);
-       for (i = 0; i < Elements(sctx->vgt_shader_config); i++)
+       for (i = 0; i < ARRAY_SIZE(sctx->vgt_shader_config); i++)
                si_pm4_delete_state(sctx, vgt_shader_config, sctx->vgt_shader_config[i]);
 
-       if (sctx->pstipple_sampler_state)
-               sctx->b.b.delete_sampler_state(&sctx->b.b, sctx->pstipple_sampler_state);
        if (sctx->fixed_func_tcs_shader.cso)
                sctx->b.b.delete_tcs_state(&sctx->b.b, sctx->fixed_func_tcs_shader.cso);
        if (sctx->custom_dsa_flush)
@@ -67,6 +77,8 @@ static void si_destroy_context(struct pipe_context *context)
                sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_decompress);
        if (sctx->custom_blend_fastclear)
                sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_fastclear);
+       if (sctx->custom_blend_dcc_decompress)
+               sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_dcc_decompress);
        util_unreference_framebuffer_state(&sctx->framebuffer.state);
 
        if (sctx->blitter)
@@ -74,18 +86,12 @@ static void si_destroy_context(struct pipe_context *context)
 
        r600_common_context_cleanup(&sctx->b);
 
-#if HAVE_LLVM >= 0x0306
        LLVMDisposeTargetMachine(sctx->tm);
-#endif
 
        r600_resource_reference(&sctx->trace_buf, NULL);
        r600_resource_reference(&sctx->last_trace_buf, NULL);
-       free(sctx->last_ib);
-       if (sctx->last_bo_list) {
-               for (i = 0; i < sctx->last_bo_count; i++)
-                       pb_reference(&sctx->last_bo_list[i].buf, NULL);
-               free(sctx->last_bo_list);
-       }
+       radeon_clear_saved_cs(&sctx->last_gfx);
+
        FREE(sctx);
 }
 
@@ -104,9 +110,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
        struct si_screen* sscreen = (struct si_screen *)screen;
        struct radeon_winsys *ws = sscreen->b.ws;
        LLVMTargetRef r600_target;
-#if HAVE_LLVM >= 0x0306
        const char *triple = "amdgcn--";
-#endif
        int shader, i;
 
        if (!sctx)
@@ -141,9 +145,30 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
                sctx->b.b.create_video_buffer = vl_video_buffer_create;
        }
 
-       sctx->b.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX, si_context_gfx_flush,
-                                      sctx, sscreen->b.trace_bo ?
-                                              sscreen->b.trace_bo->buf : NULL);
+       sctx->b.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX,
+                                      si_context_gfx_flush, sctx);
+
+       if (!(sscreen->b.debug_flags & DBG_NO_CE) && ws->cs_add_const_ib) {
+               sctx->ce_ib = ws->cs_add_const_ib(sctx->b.gfx.cs);
+               if (!sctx->ce_ib)
+                       goto fail;
+
+               if (ws->cs_add_const_preamble_ib) {
+                       sctx->ce_preamble_ib =
+                                  ws->cs_add_const_preamble_ib(sctx->b.gfx.cs);
+
+                       if (!sctx->ce_preamble_ib)
+                               goto fail;
+               }
+
+               sctx->ce_suballocator =
+                               u_suballocator_create(&sctx->b.b, 1024 * 1024,
+                                                     PIPE_BIND_CUSTOM,
+                                                     PIPE_USAGE_DEFAULT, false);
+               if (!sctx->ce_suballocator)
+                       goto fail;
+       }
+
        sctx->b.gfx.flush = si_context_gfx_flush;
 
        /* Border colors. */
@@ -169,6 +194,11 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
        si_init_state_functions(sctx);
        si_init_shader_functions(sctx);
 
+       if (sctx->b.chip_class >= CIK)
+               cik_init_sdma_functions(sctx);
+       else
+               si_init_dma_functions(sctx);
+
        if (sscreen->b.debug_flags & DBG_FORCE_DMA)
                sctx->b.b.resource_copy_region = sctx->b.dma_copy;
 
@@ -201,25 +231,42 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 
                /* Clear the NULL constant buffer, because loads should return zeros. */
                sctx->b.clear_buffer(&sctx->b.b, sctx->null_const_buf.buffer, 0,
-                                    sctx->null_const_buf.buffer->width0, 0, false);
+                                    sctx->null_const_buf.buffer->width0, 0,
+                                    R600_COHERENCY_SHADER);
        }
 
-       /* XXX: This is the maximum value allowed.  I'm not sure how to compute
-        * this for non-cs shaders.  Using the wrong value here can result in
-        * GPU lockups, but the maximum value seems to always work.
+       uint64_t max_threads_per_block;
+       screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
+                                 PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK,
+                                 &max_threads_per_block);
+
+       /* The maximum number of scratch waves. Scratch space isn't divided
+        * evenly between CUs. The number is only a function of the number of CUs.
+        * We can decrease the constant to decrease the scratch buffer size.
+        *
+        * sctx->scratch_waves must be >= the maximum posible size of
+        * 1 threadgroup, so that the hw doesn't hang from being unable
+        * to start any.
+        *
+        * The recommended value is 4 per CU at most. Higher numbers don't
+        * bring much benefit, but they still occupy chip resources (think
+        * async compute). I've seen ~2% performance difference between 4 and 32.
         */
-       sctx->scratch_waves = 32 * sscreen->b.info.max_compute_units;
+       sctx->scratch_waves = MAX2(32 * sscreen->b.info.num_good_compute_units,
+                                  max_threads_per_block / 64);
 
-#if HAVE_LLVM >= 0x0306
        /* Initialize LLVM TargetMachine */
        r600_target = radeon_llvm_get_r600_target(triple);
        sctx->tm = LLVMCreateTargetMachine(r600_target, triple,
                                           r600_get_llvm_processor_name(sscreen->b.family),
-                                          "+DumpCode,+vgpr-spilling",
+#if HAVE_LLVM >= 0x0308
+                                          sscreen->b.debug_flags & DBG_SI_SCHED ?
+                                               SI_LLVM_DEFAULT_FEATURES ",+si-scheduler" :
+#endif
+                                               SI_LLVM_DEFAULT_FEATURES,
                                           LLVMCodeGenLevelDefault,
                                           LLVMRelocDefault,
                                           LLVMCodeModelDefault);
-#endif
 
        return &sctx->b.b;
 fail:
@@ -301,6 +348,16 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
        case PIPE_CAP_TGSI_TXQS:
        case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
        case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+       case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
+       case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+       case PIPE_CAP_INVALIDATE_BUFFER:
+       case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+       case PIPE_CAP_QUERY_MEMORY_INFO:
+       case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+       case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
+       case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
+       case PIPE_CAP_GENERATE_MIPMAP:
+       case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED:
                return 1;
 
        case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
@@ -325,13 +382,23 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
        case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
        case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
                return 4;
+       case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+               return HAVE_LLVM >= 0x0309 ? 4 : 0;
 
        case PIPE_CAP_GLSL_FEATURE_LEVEL:
-               return HAVE_LLVM >= 0x0307 ? 410 : 330;
+               if (pscreen->get_shader_param(pscreen, PIPE_SHADER_COMPUTE,
+                                             PIPE_SHADER_CAP_SUPPORTED_IRS) &
+                   (1 << PIPE_SHADER_IR_TGSI))
+                       return 430;
+               return HAVE_LLVM >= 0x0309 ? 420 :
+                      HAVE_LLVM >= 0x0307 ? 410 : 330;
 
        case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
                return MIN2(sscreen->b.info.vram_size, 0xFFFFFFFF);
 
+       case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+               return 0;
+
        /* Unsupported features. */
        case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
        case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
@@ -341,12 +408,14 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
        case PIPE_CAP_VERTEXID_NOBASE:
        case PIPE_CAP_CLEAR_TEXTURE:
        case PIPE_CAP_DRAW_PARAMETERS:
-       case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
        case PIPE_CAP_MULTI_DRAW_INDIRECT:
        case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
-       case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
-       case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
-       case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+       case PIPE_CAP_STRING_MARKER:
+       case PIPE_CAP_QUERY_BUFFER_OBJECT:
+       case PIPE_CAP_CULL_DISTANCE:
+       case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
+       case PIPE_CAP_TGSI_VOTE:
+       case PIPE_CAP_MAX_WINDOW_RECTANGLES:
                return 0;
 
        case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
@@ -391,12 +460,12 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
                return 8;
 
        case PIPE_CAP_MAX_VIEWPORTS:
-               return SI_MAX_VIEWPORTS;
+               return R600_MAX_VIEWPORTS;
 
        /* Timer queries, present when the clock frequency is non zero. */
        case PIPE_CAP_QUERY_TIMESTAMP:
        case PIPE_CAP_QUERY_TIME_ELAPSED:
-               return sscreen->b.info.r600_clock_crystal_freq != 0;
+               return sscreen->b.info.clock_crystal_freq != 0;
 
        case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
        case PIPE_CAP_MIN_TEXEL_OFFSET:
@@ -410,7 +479,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
                return PIPE_ENDIAN_LITTLE;
 
        case PIPE_CAP_VENDOR_ID:
-               return 0x1002;
+               return ATI_VENDOR_ID;
        case PIPE_CAP_DEVICE_ID:
                return sscreen->b.info.pci_id;
        case PIPE_CAP_ACCELERATED:
@@ -419,12 +488,22 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
                return sscreen->b.info.vram_size >> 20;
        case PIPE_CAP_UMA:
                return 0;
+       case PIPE_CAP_PCI_GROUP:
+               return sscreen->b.info.pci_domain;
+       case PIPE_CAP_PCI_BUS:
+               return sscreen->b.info.pci_bus;
+       case PIPE_CAP_PCI_DEVICE:
+               return sscreen->b.info.pci_dev;
+       case PIPE_CAP_PCI_FUNCTION:
+               return sscreen->b.info.pci_func;
        }
        return 0;
 }
 
 static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enum pipe_shader_cap param)
 {
+       struct si_screen *sscreen = (struct si_screen *)pscreen;
+
        switch(shader)
        {
        case PIPE_SHADER_FRAGMENT:
@@ -434,24 +513,33 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
        case PIPE_SHADER_TESS_CTRL:
        case PIPE_SHADER_TESS_EVAL:
                /* LLVM 3.6.2 is required for tessellation because of bug fixes there */
-               if (HAVE_LLVM < 0x0306 ||
-                   (HAVE_LLVM == 0x0306 && MESA_LLVM_VERSION_PATCH < 2))
+               if (HAVE_LLVM == 0x0306 && MESA_LLVM_VERSION_PATCH < 2)
                        return 0;
                break;
        case PIPE_SHADER_COMPUTE:
                switch (param) {
                case PIPE_SHADER_CAP_PREFERRED_IR:
-#if HAVE_LLVM < 0x0306
-                       return PIPE_SHADER_IR_LLVM;
-#else
                        return PIPE_SHADER_IR_NATIVE;
-#endif
+
+               case PIPE_SHADER_CAP_SUPPORTED_IRS: {
+                       int ir = 1 << PIPE_SHADER_IR_NATIVE;
+
+                       /* Old kernels disallowed some register writes for SI
+                        * that are used for indirect dispatches. */
+                       if (HAVE_LLVM >= 0x309 && (sscreen->b.chip_class >= CIK ||
+                                                  sscreen->b.info.drm_major == 3 ||
+                                                  (sscreen->b.info.drm_major == 2 &&
+                                                   sscreen->b.info.drm_minor >= 45)))
+                               ir |= 1 << PIPE_SHADER_IR_TGSI;
+
+                       return ir;
+               }
                case PIPE_SHADER_CAP_DOUBLES:
                        return HAVE_LLVM >= 0x0307;
 
                case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: {
                        uint64_t max_const_buffer_size;
-                       pscreen->get_compute_param(pscreen,
+                       pscreen->get_compute_param(pscreen, PIPE_SHADER_IR_TGSI,
                                PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
                                &max_const_buffer_size);
                        return max_const_buffer_size;
@@ -484,7 +572,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
        case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
                return 4096 * sizeof(float[4]); /* actually only memory limits this */
        case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
-               return SI_NUM_USER_CONST_BUFFERS;
+               return SI_NUM_CONST_BUFFERS;
        case PIPE_SHADER_CAP_MAX_PREDS:
                return 0; /* FIXME */
        case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
@@ -506,9 +594,11 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
                return 0;
        case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
        case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
-               return 16;
+               return SI_NUM_SAMPLERS;
        case PIPE_SHADER_CAP_PREFERRED_IR:
                return PIPE_SHADER_IR_TGSI;
+       case PIPE_SHADER_CAP_SUPPORTED_IRS:
+               return 0;
        case PIPE_SHADER_CAP_DOUBLES:
                return HAVE_LLVM >= 0x0307;
        case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
@@ -520,7 +610,9 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
        case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
                return 32;
        case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
-               return 0;
+               return HAVE_LLVM >= 0x0309 ? SI_NUM_SHADER_BUFFERS : 0;
+       case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
+               return HAVE_LLVM >= 0x0309 ? SI_NUM_IMAGES : 0;
        }
        return 0;
 }
@@ -528,6 +620,14 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
 static void si_destroy_screen(struct pipe_screen* pscreen)
 {
        struct si_screen *sscreen = (struct si_screen *)pscreen;
+       struct si_shader_part *parts[] = {
+               sscreen->vs_prologs,
+               sscreen->vs_epilogs,
+               sscreen->tcs_epilogs,
+               sscreen->ps_prologs,
+               sscreen->ps_epilogs
+       };
+       unsigned i;
 
        if (!sscreen)
                return;
@@ -535,58 +635,19 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
        if (!sscreen->b.ws->unref(sscreen->b.ws))
                return;
 
-       r600_destroy_common_screen(&sscreen->b);
-}
-
-#define SI_TILE_MODE_COLOR_2D_8BPP  14
-
-/* Initialize pipe config. This is especially important for GPUs
- * with 16 pipes and more where it's initialized incorrectly by
- * the TILING_CONFIG ioctl. */
-static bool si_initialize_pipe_config(struct si_screen *sscreen)
-{
-       unsigned mode2d;
-
-       /* This is okay, because there can be no 2D tiling without
-        * the tile mode array, so we won't need the pipe config.
-        * Return "success".
-        */
-       if (!sscreen->b.info.si_tile_mode_array_valid)
-               return true;
-
-       /* The same index is used for the 2D mode on CIK too. */
-       mode2d = sscreen->b.info.si_tile_mode_array[SI_TILE_MODE_COLOR_2D_8BPP];
+       /* Free shader parts. */
+       for (i = 0; i < ARRAY_SIZE(parts); i++) {
+               while (parts[i]) {
+                       struct si_shader_part *part = parts[i];
 
-       switch (G_009910_PIPE_CONFIG(mode2d)) {
-       case V_02803C_ADDR_SURF_P2:
-               sscreen->b.tiling_info.num_channels = 2;
-               break;
-       case V_02803C_X_ADDR_SURF_P4_8X16:
-       case V_02803C_X_ADDR_SURF_P4_16X16:
-       case V_02803C_X_ADDR_SURF_P4_16X32:
-       case V_02803C_X_ADDR_SURF_P4_32X32:
-               sscreen->b.tiling_info.num_channels = 4;
-               break;
-       case V_02803C_X_ADDR_SURF_P8_16X16_8X16:
-       case V_02803C_X_ADDR_SURF_P8_16X32_8X16:
-       case V_02803C_X_ADDR_SURF_P8_32X32_8X16:
-       case V_02803C_X_ADDR_SURF_P8_16X32_16X16:
-       case V_02803C_X_ADDR_SURF_P8_32X32_16X16:
-       case V_02803C_X_ADDR_SURF_P8_32X32_16X32:
-       case V_02803C_X_ADDR_SURF_P8_32X64_32X32:
-               sscreen->b.tiling_info.num_channels = 8;
-               break;
-       case V_02803C_X_ADDR_SURF_P16_32X32_8X16:
-       case V_02803C_X_ADDR_SURF_P16_32X32_16X16:
-               sscreen->b.tiling_info.num_channels = 16;
-               break;
-       default:
-               assert(0);
-               fprintf(stderr, "radeonsi: Unknown pipe config %i.\n",
-                       G_009910_PIPE_CONFIG(mode2d));
-               return false;
+                       parts[i] = part->next;
+                       radeon_shader_binary_clean(&part->binary);
+                       FREE(part);
+               }
        }
-       return true;
+       pipe_mutex_destroy(sscreen->shader_parts_mutex);
+       si_destroy_shader_cache(sscreen);
+       r600_destroy_common_screen(&sscreen->b);
 }
 
 static bool si_init_gs_info(struct si_screen *sscreen)
@@ -609,6 +670,8 @@ static bool si_init_gs_info(struct si_screen *sscreen)
        case CHIP_HAWAII:
        case CHIP_TONGA:
        case CHIP_FIJI:
+       case CHIP_POLARIS10:
+       case CHIP_POLARIS11:
                sscreen->gs_table_depth = 32;
                return true;
        default:
@@ -629,27 +692,35 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
        sscreen->b.b.destroy = si_destroy_screen;
        sscreen->b.b.get_param = si_get_param;
        sscreen->b.b.get_shader_param = si_get_shader_param;
-       sscreen->b.b.is_format_supported = si_is_format_supported;
        sscreen->b.b.resource_create = r600_resource_create_common;
 
+       si_init_screen_state_functions(sscreen);
+
        if (!r600_common_screen_init(&sscreen->b, ws) ||
-           !si_initialize_pipe_config(sscreen) ||
-           !si_init_gs_info(sscreen)) {
+           !si_init_gs_info(sscreen) ||
+           !si_init_shader_cache(sscreen)) {
                FREE(sscreen);
                return NULL;
        }
 
-       if (!debug_get_bool_option("RADEON_DISABLE_PERFCOUNTERS", FALSE))
+       if (!debug_get_bool_option("RADEON_DISABLE_PERFCOUNTERS", false))
                si_init_perfcounters(sscreen);
 
        sscreen->b.has_cp_dma = true;
        sscreen->b.has_streamout = true;
+       pipe_mutex_init(sscreen->shader_parts_mutex);
+       sscreen->use_monolithic_shaders =
+               HAVE_LLVM < 0x0308 ||
+               (sscreen->b.debug_flags & DBG_MONOLITHIC_SHADERS) != 0;
 
-       if (debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE))
+       if (debug_get_bool_option("RADEON_DUMP_SHADERS", false))
                sscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS;
 
        /* Create the auxiliary context. This must be done last. */
        sscreen->b.aux_context = sscreen->b.b.context_create(&sscreen->b.b, NULL, 0);
 
+       if (sscreen->b.debug_flags & DBG_TEST_DMA)
+               r600_test_dma(&sscreen->b);
+
        return &sscreen->b.b;
 }