#include "util/u_suballoc.h"
#include "amd/common/sid.h"
-#define SI_MAX_STREAMS 4
-
static const struct si_query_ops query_hw_ops;
struct si_hw_query_params {
struct pipe_fence_handle *fence;
};
-static void si_query_sw_destroy(struct si_screen *sscreen,
+static void si_query_sw_destroy(struct si_context *sctx,
struct si_query *squery)
{
struct si_query_sw *query = (struct si_query_sw *)squery;
- sscreen->b.fence_reference(&sscreen->b, &query->fence, NULL);
+ sctx->b.screen->fence_reference(sctx->b.screen, &query->fence, NULL);
FREE(query);
}
case SI_QUERY_NUM_SHADERS_CREATED:
query->begin_result = p_atomic_read(&sctx->screen->num_shaders_created);
break;
- case SI_QUERY_NUM_SHADER_CACHE_HITS:
- query->begin_result =
- p_atomic_read(&sctx->screen->num_shader_cache_hits);
+ case SI_QUERY_LIVE_SHADER_CACHE_HITS:
+ query->begin_result = sctx->screen->live_shader_cache.hits;
+ break;
+ case SI_QUERY_LIVE_SHADER_CACHE_MISSES:
+ query->begin_result = sctx->screen->live_shader_cache.misses;
+ break;
+ case SI_QUERY_MEMORY_SHADER_CACHE_HITS:
+ query->begin_result = sctx->screen->num_memory_shader_cache_hits;
+ break;
+ case SI_QUERY_MEMORY_SHADER_CACHE_MISSES:
+ query->begin_result = sctx->screen->num_memory_shader_cache_misses;
+ break;
+ case SI_QUERY_DISK_SHADER_CACHE_HITS:
+ query->begin_result = sctx->screen->num_disk_shader_cache_hits;
+ break;
+ case SI_QUERY_DISK_SHADER_CACHE_MISSES:
+ query->begin_result = sctx->screen->num_disk_shader_cache_misses;
+ break;
+ case SI_QUERY_PD_NUM_PRIMS_ACCEPTED:
+ query->begin_result = sctx->compute_num_verts_accepted;
+ break;
+ case SI_QUERY_PD_NUM_PRIMS_REJECTED:
+ query->begin_result = sctx->compute_num_verts_rejected;
+ break;
+ case SI_QUERY_PD_NUM_PRIMS_INELIGIBLE:
+ query->begin_result = sctx->compute_num_verts_ineligible;
break;
case SI_QUERY_GPIN_ASIC_ID:
case SI_QUERY_GPIN_NUM_SIMD:
case SI_QUERY_BACK_BUFFER_PS_DRAW_RATIO:
query->end_result = sctx->last_tex_ps_draw_ratio;
break;
- case SI_QUERY_NUM_SHADER_CACHE_HITS:
- query->end_result =
- p_atomic_read(&sctx->screen->num_shader_cache_hits);
+ case SI_QUERY_LIVE_SHADER_CACHE_HITS:
+ query->end_result = sctx->screen->live_shader_cache.hits;
+ break;
+ case SI_QUERY_LIVE_SHADER_CACHE_MISSES:
+ query->end_result = sctx->screen->live_shader_cache.misses;
+ break;
+ case SI_QUERY_MEMORY_SHADER_CACHE_HITS:
+ query->end_result = sctx->screen->num_memory_shader_cache_hits;
+ break;
+ case SI_QUERY_MEMORY_SHADER_CACHE_MISSES:
+ query->end_result = sctx->screen->num_memory_shader_cache_misses;
+ break;
+ case SI_QUERY_DISK_SHADER_CACHE_HITS:
+ query->end_result = sctx->screen->num_disk_shader_cache_hits;
+ break;
+ case SI_QUERY_DISK_SHADER_CACHE_MISSES:
+ query->end_result = sctx->screen->num_disk_shader_cache_misses;
+ break;
+ case SI_QUERY_PD_NUM_PRIMS_ACCEPTED:
+ query->end_result = sctx->compute_num_verts_accepted;
+ break;
+ case SI_QUERY_PD_NUM_PRIMS_REJECTED:
+ query->end_result = sctx->compute_num_verts_rejected;
+ break;
+ case SI_QUERY_PD_NUM_PRIMS_INELIGIBLE:
+ query->end_result = sctx->compute_num_verts_ineligible;
break;
case SI_QUERY_GPIN_ASIC_ID:
case SI_QUERY_GPIN_NUM_SIMD:
result->u64 = (query->end_result - query->begin_result) * 100 /
(query->end_time - query->begin_time);
return true;
+ case SI_QUERY_PD_NUM_PRIMS_ACCEPTED:
+ case SI_QUERY_PD_NUM_PRIMS_REJECTED:
+ case SI_QUERY_PD_NUM_PRIMS_INELIGIBLE:
+ result->u64 = ((unsigned)query->end_result -
+ (unsigned)query->begin_result) / 3;
+ return true;
case SI_QUERY_GPIN_ASIC_ID:
result->u32 = 0;
return true;
}
buffer->results_end = 0;
+ if (!buffer->buf)
+ return;
+
/* Discard even the oldest buffer if it can't be mapped without a stall. */
- if (buffer->buf &&
- (si_rings_is_buffer_referenced(sctx, buffer->buf->buf, RADEON_USAGE_READWRITE) ||
- !sctx->ws->buffer_wait(buffer->buf->buf, 0, RADEON_USAGE_READWRITE))) {
+ if (si_rings_is_buffer_referenced(sctx, buffer->buf->buf, RADEON_USAGE_READWRITE) ||
+ !sctx->ws->buffer_wait(buffer->buf->buf, 0, RADEON_USAGE_READWRITE)) {
si_resource_reference(&buffer->buf, NULL);
+ } else {
+ buffer->unprepared = true;
}
}
bool (*prepare_buffer)(struct si_context *, struct si_query_buffer*),
unsigned size)
{
- if (buffer->buf && buffer->results_end + size >= buffer->buf->b.b.width0)
- return true;
+ bool unprepared = buffer->unprepared;
+ buffer->unprepared = false;
+
+ if (!buffer->buf || buffer->results_end + size > buffer->buf->b.b.width0) {
+ if (buffer->buf) {
+ struct si_query_buffer *qbuf = MALLOC_STRUCT(si_query_buffer);
+ memcpy(qbuf, buffer, sizeof(*qbuf));
+ buffer->previous = qbuf;
+ }
+ buffer->results_end = 0;
- if (buffer->buf) {
- struct si_query_buffer *qbuf = MALLOC_STRUCT(si_query_buffer);
- memcpy(qbuf, buffer, sizeof(*qbuf));
- buffer->previous = qbuf;
+ /* Queries are normally read by the CPU after
+ * being written by the gpu, hence staging is probably a good
+ * usage pattern.
+ */
+ struct si_screen *screen = sctx->screen;
+ unsigned buf_size = MAX2(size, screen->info.min_alloc_size);
+ buffer->buf = si_resource(
+ pipe_buffer_create(&screen->b, 0, PIPE_USAGE_STAGING, buf_size));
+ if (unlikely(!buffer->buf))
+ return false;
+ unprepared = true;
}
- buffer->results_end = 0;
-
- /* Queries are normally read by the CPU after
- * being written by the gpu, hence staging is probably a good
- * usage pattern.
- */
- struct si_screen *screen = sctx->screen;
- unsigned buf_size = MAX2(size, screen->info.min_alloc_size);
- buffer->buf = si_resource(
- pipe_buffer_create(&screen->b, 0, PIPE_USAGE_STAGING, buf_size));
- if (unlikely(!buffer->buf))
- return false;
-
- if (prepare_buffer) {
+ if (unprepared && prepare_buffer) {
if (unlikely(!prepare_buffer(sctx, buffer))) {
si_resource_reference(&buffer->buf, NULL);
return false;
}
-void si_query_hw_destroy(struct si_screen *sscreen,
- struct si_query *squery)
+void si_query_hw_destroy(struct si_context *sctx, struct si_query *squery)
{
struct si_query_hw *query = (struct si_query_hw *)squery;
- si_query_buffer_destroy(sscreen, &query->buffer);
+ si_query_buffer_destroy(sctx->screen, &query->buffer);
si_resource_reference(&query->workaround_buf, NULL);
FREE(squery);
}
emit_sample_streamout(cs, va + 32 * stream, stream);
break;
case PIPE_QUERY_TIME_ELAPSED:
- si_cp_release_mem(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0,
+ si_cp_release_mem(sctx, cs, V_028A90_BOTTOM_OF_PIPE_TS, 0,
EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
EOP_DATA_SEL_TIMESTAMP, NULL, va,
0, query->b.type);
si_update_occlusion_query_state(sctx, query->b.type, 1);
si_update_prims_generated_query_state(sctx, query->b.type, 1);
+ if (query->b.type == PIPE_QUERY_PIPELINE_STATISTICS)
+ sctx->num_pipeline_stat_queries++;
+
if (query->b.type != SI_QUERY_TIME_ELAPSED_SDMA)
si_need_gfx_cs_space(sctx);
va += 8;
/* fall through */
case PIPE_QUERY_TIMESTAMP:
- si_cp_release_mem(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0,
+ si_cp_release_mem(sctx, cs, V_028A90_BOTTOM_OF_PIPE_TS, 0,
EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
EOP_DATA_SEL_TIMESTAMP, NULL, va,
0, query->b.type);
RADEON_PRIO_QUERY);
if (fence_va) {
- si_cp_release_mem(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0,
+ si_cp_release_mem(sctx, cs, V_028A90_BOTTOM_OF_PIPE_TS, 0,
EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
EOP_DATA_SEL_VALUE_32BIT,
query->buffer.buf, fence_va, 0x80000000,
si_update_occlusion_query_state(sctx, query->b.type, -1);
si_update_prims_generated_query_state(sctx, query->b.type, -1);
+
+ if (query->b.type == PIPE_QUERY_PIPELINE_STATISTICS)
+ sctx->num_pipeline_stat_queries--;
}
static void emit_set_predicate(struct si_context *ctx,
if (!query)
return;
+ if (ctx->screen->use_ngg_streamout &&
+ (query->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
+ query->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)) {
+ assert(!"not implemented");
+ }
+
invert = ctx->render_cond_invert;
flag_wait = ctx->render_cond_mode == PIPE_RENDER_COND_WAIT ||
ctx->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT;
/* Use the value written by compute shader as a workaround. Note that
* the wait flag does not apply in this predication mode.
*
- * The shader outputs the result value to L2. Workarounds only affect VI
+ * The shader outputs the result value to L2. Workarounds only affect GFX8
* and later, where the CP reads data from L2, so we don't need an
* additional flush.
*/
query_type != SI_QUERY_TIME_ELAPSED_SDMA))
return si_query_sw_create(query_type);
+ if (sscreen->use_ngg_streamout &&
+ (query_type == PIPE_QUERY_PRIMITIVES_EMITTED ||
+ query_type == PIPE_QUERY_PRIMITIVES_GENERATED ||
+ query_type == PIPE_QUERY_SO_STATISTICS ||
+ query_type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
+ query_type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE))
+ return gfx10_sh_query_create(sscreen, query_type, index);
+
return si_query_hw_create(sscreen, query_type, index);
}
struct si_context *sctx = (struct si_context *)ctx;
struct si_query *squery = (struct si_query *)query;
- squery->ops->destroy(sctx->screen, squery);
+ squery->ops->destroy(sctx, squery);
}
-static boolean si_begin_query(struct pipe_context *ctx,
- struct pipe_query *query)
+static bool si_begin_query(struct pipe_context *ctx,
+ struct pipe_query *query)
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_query *squery = (struct si_query *)query;
if (!query->buffer.buf)
return false;
- LIST_ADDTAIL(&query->b.active_list, &sctx->active_queries);
+ list_addtail(&query->b.active_list, &sctx->active_queries);
sctx->num_cs_dw_queries_suspend += query->b.num_cs_dw_suspend;
return true;
}
si_query_hw_emit_stop(sctx, query);
if (!(query->flags & SI_QUERY_HW_FLAG_NO_START)) {
- LIST_DELINIT(&query->b.active_list);
+ list_delinit(&query->b.active_list);
sctx->num_cs_dw_queries_suspend -= query->b.num_cs_dw_suspend;
}
.resume = si_query_hw_resume,
};
-static boolean si_get_query_result(struct pipe_context *ctx,
- struct pipe_query *query, boolean wait,
- union pipe_query_result *result)
+static bool si_get_query_result(struct pipe_context *ctx,
+ struct pipe_query *query, bool wait,
+ union pipe_query_result *result)
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_query *squery = (struct si_query *)query;
static void si_get_query_result_resource(struct pipe_context *ctx,
struct pipe_query *query,
- boolean wait,
+ bool wait,
enum pipe_query_value_type result_type,
int index,
struct pipe_resource *resource,
return true;
}
-static void si_restore_qbo_state(struct si_context *sctx,
- struct si_qbo_state *st)
-{
- sctx->b.bind_compute_state(&sctx->b, st->saved_compute);
-
- sctx->b.set_constant_buffer(&sctx->b, PIPE_SHADER_COMPUTE, 0, &st->saved_const0);
- pipe_resource_reference(&st->saved_const0.buffer, NULL);
-
- sctx->b.set_shader_buffers(&sctx->b, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo);
- for (unsigned i = 0; i < 3; ++i)
- pipe_resource_reference(&st->saved_ssbo[i].buffer, NULL);
-}
-
static void si_query_hw_get_result_resource(struct si_context *sctx,
struct si_query *squery,
bool wait,
si_resource(resource)->TC_L2_dirty = true;
}
- sctx->b.set_shader_buffers(&sctx->b, PIPE_SHADER_COMPUTE, 0, 3, ssbo);
+ sctx->b.set_shader_buffers(&sctx->b, PIPE_SHADER_COMPUTE, 0, 3, ssbo,
+ 1 << 2);
if (wait && qbuf == &query->buffer) {
uint64_t va;
static void si_render_condition(struct pipe_context *ctx,
struct pipe_query *query,
- boolean condition,
+ bool condition,
enum pipe_render_cond_flag mode)
{
struct si_context *sctx = (struct si_context *)ctx;
if (query) {
bool needs_workaround = false;
- /* There was a firmware regression in VI which causes successive
+ /* There was a firmware regression in GFX8 which causes successive
* SET_PREDICATION packets to give the wrong answer for
* non-inverted stream overflow predication.
*/
- if (((sctx->chip_class == VI && sctx->screen->info.pfp_fw_feature < 49) ||
+ if (((sctx->chip_class == GFX8 && sctx->screen->info.pfp_fw_feature < 49) ||
(sctx->chip_class == GFX9 && sctx->screen->info.pfp_fw_feature < 38)) &&
!condition &&
(squery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE ||
static struct pipe_driver_query_info si_driver_query_list[] = {
X("num-compilations", NUM_COMPILATIONS, UINT64, CUMULATIVE),
X("num-shaders-created", NUM_SHADERS_CREATED, UINT64, CUMULATIVE),
- X("num-shader-cache-hits", NUM_SHADER_CACHE_HITS, UINT64, CUMULATIVE),
X("draw-calls", DRAW_CALLS, UINT64, AVERAGE),
X("decompress-calls", DECOMPRESS_CALLS, UINT64, AVERAGE),
X("MRT-draw-calls", MRT_DRAW_CALLS, UINT64, AVERAGE),
X("VRAM-vis-usage", VRAM_VIS_USAGE, BYTES, AVERAGE),
X("GTT-usage", GTT_USAGE, BYTES, AVERAGE),
X("back-buffer-ps-draw-ratio", BACK_BUFFER_PS_DRAW_RATIO, UINT64, AVERAGE),
+ X("live-shader-cache-hits", LIVE_SHADER_CACHE_HITS, UINT, CUMULATIVE),
+ X("live-shader-cache-misses", LIVE_SHADER_CACHE_MISSES, UINT, CUMULATIVE),
+ X("memory-shader-cache-hits", MEMORY_SHADER_CACHE_HITS, UINT, CUMULATIVE),
+ X("memory-shader-cache-misses", MEMORY_SHADER_CACHE_MISSES, UINT, CUMULATIVE),
+ X("disk-shader-cache-hits", DISK_SHADER_CACHE_HITS, UINT, CUMULATIVE),
+ X("disk-shader-cache-misses", DISK_SHADER_CACHE_MISSES, UINT, CUMULATIVE),
/* GPIN queries are for the benefit of old versions of GPUPerfStudio,
* which use it as a fallback path to detect the GPU type.
X("GPU-surf-sync-busy", GPU_SURF_SYNC_BUSY, UINT64, AVERAGE),
X("GPU-cp-dma-busy", GPU_CP_DMA_BUSY, UINT64, AVERAGE),
X("GPU-scratch-ram-busy", GPU_SCRATCH_RAM_BUSY, UINT64, AVERAGE),
+
+ X("pd-num-prims-accepted", PD_NUM_PRIMS_ACCEPTED, UINT64, AVERAGE),
+ X("pd-num-prims-rejected", PD_NUM_PRIMS_REJECTED, UINT64, AVERAGE),
+ X("pd-num-prims-ineligible", PD_NUM_PRIMS_INELIGIBLE,UINT64, AVERAGE),
};
#undef X
static unsigned si_get_num_queries(struct si_screen *sscreen)
{
/* amdgpu */
- if (sscreen->info.drm_major == 3) {
- if (sscreen->info.chip_class >= VI)
+ if (sscreen->info.is_amdgpu) {
+ if (sscreen->info.chip_class >= GFX8)
return ARRAY_SIZE(si_driver_query_list);
else
return ARRAY_SIZE(si_driver_query_list) - 7;
/* radeon */
if (sscreen->info.has_read_registers_query) {
- if (sscreen->info.chip_class == CIK)
+ if (sscreen->info.chip_class == GFX7)
return ARRAY_SIZE(si_driver_query_list) - 6;
else
return ARRAY_SIZE(si_driver_query_list) - 7;
sctx->b.end_query = si_end_query;
sctx->b.get_query_result = si_get_query_result;
sctx->b.get_query_result_resource = si_get_query_result_resource;
- sctx->atoms.s.render_cond.emit = si_emit_query_predication;
- if (((struct si_screen*)sctx->b.screen)->info.num_render_backends > 0)
- sctx->b.render_condition = si_render_condition;
+ if (sctx->has_graphics) {
+ sctx->atoms.s.render_cond.emit = si_emit_query_predication;
+ sctx->b.render_condition = si_render_condition;
+ }
- LIST_INITHEAD(&sctx->active_queries);
+ list_inithead(&sctx->active_queries);
}
void si_init_screen_query_functions(struct si_screen *sscreen)