#include "util/u_suballoc.h"
#include "vl/vl_decoder.h"
+#define SI_LLVM_DEFAULT_FEATURES \
+ "+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals"
+
/*
* pipe_context
*/
r600_resource_reference(&sctx->trace_buf, NULL);
r600_resource_reference(&sctx->last_trace_buf, NULL);
- free(sctx->last_ib);
- if (sctx->last_bo_list) {
- for (i = 0; i < sctx->last_bo_count; i++)
- pb_reference(&sctx->last_bo_list[i].buf, NULL);
- free(sctx->last_bo_list);
- }
+ radeon_clear_saved_cs(&sctx->last_gfx);
+
FREE(sctx);
}
sctx->ce_suballocator =
u_suballocator_create(&sctx->b.b, 1024 * 1024,
- 64, PIPE_BIND_CUSTOM,
- PIPE_USAGE_DEFAULT, FALSE);
+ PIPE_BIND_CUSTOM,
+ PIPE_USAGE_DEFAULT, false);
if (!sctx->ce_suballocator)
goto fail;
}
R600_COHERENCY_SHADER);
}
- /* XXX: This is the maximum value allowed. I'm not sure how to compute
- * this for non-cs shaders. Using the wrong value here can result in
- * GPU lockups, but the maximum value seems to always work.
+ uint64_t max_threads_per_block;
+ screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
+ PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK,
+ &max_threads_per_block);
+
+ /* The maximum number of scratch waves. Scratch space isn't divided
+ * evenly between CUs. The number is only a function of the number of CUs.
+ * We can decrease the constant to decrease the scratch buffer size.
+ *
+ * sctx->scratch_waves must be >= the maximum posible size of
+ * 1 threadgroup, so that the hw doesn't hang from being unable
+ * to start any.
+ *
+ * The recommended value is 4 per CU at most. Higher numbers don't
+ * bring much benefit, but they still occupy chip resources (think
+ * async compute). I've seen ~2% performance difference between 4 and 32.
*/
- sctx->scratch_waves = 32 * sscreen->b.info.num_good_compute_units;
+ sctx->scratch_waves = MAX2(32 * sscreen->b.info.num_good_compute_units,
+ max_threads_per_block / 64);
/* Initialize LLVM TargetMachine */
r600_target = radeon_llvm_get_r600_target(triple);
r600_get_llvm_processor_name(sscreen->b.family),
#if HAVE_LLVM >= 0x0308
sscreen->b.debug_flags & DBG_SI_SCHED ?
- "+DumpCode,+vgpr-spilling,+si-scheduler" :
+ SI_LLVM_DEFAULT_FEATURES ",+si-scheduler" :
#endif
- "+DumpCode,+vgpr-spilling",
+ SI_LLVM_DEFAULT_FEATURES,
LLVMCodeGenLevelDefault,
LLVMRelocDefault,
LLVMCodeModelDefault);
case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
+ case PIPE_CAP_GENERATE_MIPMAP:
+ case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED:
return 1;
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DRAW_PARAMETERS:
case PIPE_CAP_MULTI_DRAW_INDIRECT:
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
- case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
case PIPE_CAP_QUERY_BUFFER_OBJECT:
case PIPE_CAP_CULL_DISTANCE:
case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
+ case PIPE_CAP_TGSI_VOTE:
+ case PIPE_CAP_MAX_WINDOW_RECTANGLES:
return 0;
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
sscreen->b.b.destroy = si_destroy_screen;
sscreen->b.b.get_param = si_get_param;
sscreen->b.b.get_shader_param = si_get_shader_param;
- sscreen->b.b.is_format_supported = si_is_format_supported;
sscreen->b.b.resource_create = r600_resource_create_common;
si_init_screen_state_functions(sscreen);
return NULL;
}
- if (!debug_get_bool_option("RADEON_DISABLE_PERFCOUNTERS", FALSE))
+ if (!debug_get_bool_option("RADEON_DISABLE_PERFCOUNTERS", false))
si_init_perfcounters(sscreen);
sscreen->b.has_cp_dma = true;
HAVE_LLVM < 0x0308 ||
(sscreen->b.debug_flags & DBG_MONOLITHIC_SHADERS) != 0;
- if (debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE))
+ if (debug_get_bool_option("RADEON_DUMP_SHADERS", false))
sscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS;
/* Create the auxiliary context. This must be done last. */