+ assert(ctx->num_cs_dw_queries_suspend == 0);
+
+ /* Check CS space here. Resuming must not be interrupted by flushes. */
+ ctx->need_gfx_cs_space(&ctx->b, num_cs_dw, true);
+
+ LIST_FOR_EACH_ENTRY(query, &ctx->active_queries, list) {
+ r600_query_hw_emit_start(ctx, query);
+ }
+}
+
+/* Fix radeon_info::enabled_rb_mask for R600, R700, EVERGREEN, NI. */
+void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen)
+{
+ struct r600_common_context *ctx =
+ (struct r600_common_context*)rscreen->aux_context;
+ struct radeon_cmdbuf *cs = ctx->gfx.cs;
+ struct r600_resource *buffer;
+ uint32_t *results;
+ unsigned i, mask = 0;
+ unsigned max_rbs;
+
+ if (ctx->family == CHIP_JUNIPER) {
+ /*
+ * Fix for predication lockups - the chip can only ever have
+ * 4 RBs, however it looks like the predication logic assumes
+ * there's 8, trying to read results from query buffers never
+ * written to. By increasing this number we'll write the
+ * status bit for these as per the normal disabled rb logic.
+ */
+ ctx->screen->info.num_render_backends = 8;
+ }
+ max_rbs = ctx->screen->info.num_render_backends;
+
+ assert(rscreen->chip_class <= CAYMAN);
+
+ /*
+ * if backend_map query is supported by the kernel.
+ * Note the kernel drm driver for a long time never filled in the
+ * associated data on eg/cm, only on r600/r700, hence ignore the valid
+ * bit there if the map is zero.
+ * (Albeit some chips with just one active rb can have a valid 0 map.)
+ */
+ if (rscreen->info.r600_gb_backend_map_valid &&
+ (ctx->chip_class < EVERGREEN || rscreen->info.r600_gb_backend_map != 0)) {
+ unsigned num_tile_pipes = rscreen->info.num_tile_pipes;
+ unsigned backend_map = rscreen->info.r600_gb_backend_map;
+ unsigned item_width, item_mask;
+
+ if (ctx->chip_class >= EVERGREEN) {
+ item_width = 4;
+ item_mask = 0x7;
+ } else {
+ item_width = 2;
+ item_mask = 0x3;
+ }
+
+ while (num_tile_pipes--) {
+ i = backend_map & item_mask;
+ mask |= (1<<i);
+ backend_map >>= item_width;
+ }
+ if (mask != 0) {
+ rscreen->info.enabled_rb_mask = mask;
+ return;
+ }
+ }
+
+ /* otherwise backup path for older kernels */
+
+ /* create buffer for event data */
+ buffer = (struct r600_resource*)
+ pipe_buffer_create(ctx->b.screen, 0,
+ PIPE_USAGE_STAGING, max_rbs * 16);
+ if (!buffer)
+ return;
+
+ /* initialize buffer with zeroes */
+ results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_WRITE);
+ if (results) {
+ memset(results, 0, max_rbs * 4 * 4);
+
+ /* emit EVENT_WRITE for ZPASS_DONE */
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+ radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
+ radeon_emit(cs, buffer->gpu_address);
+ radeon_emit(cs, buffer->gpu_address >> 32);
+
+ r600_emit_reloc(ctx, &ctx->gfx, buffer,
+ RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
+
+ /* analyze results */
+ results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_READ);
+ if (results) {
+ for(i = 0; i < max_rbs; i++) {
+ /* at least highest bit will be set if backend is used */
+ if (results[i*4 + 1])
+ mask |= (1<<i);
+ }
+ }
+ }
+
+ r600_resource_reference(&buffer, NULL);
+
+ if (mask) {
+ if (rscreen->debug_flags & DBG_INFO &&
+ mask != rscreen->info.enabled_rb_mask) {
+ printf("enabled_rb_mask (fixed) = 0x%x\n", mask);
+ }
+ rscreen->info.enabled_rb_mask = mask;
+ }
+}
+
+#define XFULL(name_, query_type_, type_, result_type_, group_id_) \
+ { \
+ .name = name_, \
+ .query_type = R600_QUERY_##query_type_, \
+ .type = PIPE_DRIVER_QUERY_TYPE_##type_, \
+ .result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_##result_type_, \
+ .group_id = group_id_ \
+ }
+
+#define X(name_, query_type_, type_, result_type_) \
+ XFULL(name_, query_type_, type_, result_type_, ~(unsigned)0)
+
+#define XG(group_, name_, query_type_, type_, result_type_) \
+ XFULL(name_, query_type_, type_, result_type_, R600_QUERY_GROUP_##group_)
+
+static const struct pipe_driver_query_info r600_driver_query_list[] = {
+ X("num-compilations", NUM_COMPILATIONS, UINT64, CUMULATIVE),
+ X("num-shaders-created", NUM_SHADERS_CREATED, UINT64, CUMULATIVE),
+ X("num-shader-cache-hits", NUM_SHADER_CACHE_HITS, UINT64, CUMULATIVE),
+ X("draw-calls", DRAW_CALLS, UINT64, AVERAGE),
+ X("decompress-calls", DECOMPRESS_CALLS, UINT64, AVERAGE),
+ X("MRT-draw-calls", MRT_DRAW_CALLS, UINT64, AVERAGE),
+ X("prim-restart-calls", PRIM_RESTART_CALLS, UINT64, AVERAGE),
+ X("spill-draw-calls", SPILL_DRAW_CALLS, UINT64, AVERAGE),
+ X("compute-calls", COMPUTE_CALLS, UINT64, AVERAGE),
+ X("spill-compute-calls", SPILL_COMPUTE_CALLS, UINT64, AVERAGE),
+ X("dma-calls", DMA_CALLS, UINT64, AVERAGE),
+ X("cp-dma-calls", CP_DMA_CALLS, UINT64, AVERAGE),
+ X("num-vs-flushes", NUM_VS_FLUSHES, UINT64, AVERAGE),
+ X("num-ps-flushes", NUM_PS_FLUSHES, UINT64, AVERAGE),
+ X("num-cs-flushes", NUM_CS_FLUSHES, UINT64, AVERAGE),
+ X("num-CB-cache-flushes", NUM_CB_CACHE_FLUSHES, UINT64, AVERAGE),
+ X("num-DB-cache-flushes", NUM_DB_CACHE_FLUSHES, UINT64, AVERAGE),
+ X("num-resident-handles", NUM_RESIDENT_HANDLES, UINT64, AVERAGE),
+ X("tc-offloaded-slots", TC_OFFLOADED_SLOTS, UINT64, AVERAGE),
+ X("tc-direct-slots", TC_DIRECT_SLOTS, UINT64, AVERAGE),
+ X("tc-num-syncs", TC_NUM_SYNCS, UINT64, AVERAGE),
+ X("CS-thread-busy", CS_THREAD_BUSY, UINT64, AVERAGE),
+ X("gallium-thread-busy", GALLIUM_THREAD_BUSY, UINT64, AVERAGE),
+ X("requested-VRAM", REQUESTED_VRAM, BYTES, AVERAGE),
+ X("requested-GTT", REQUESTED_GTT, BYTES, AVERAGE),
+ X("mapped-VRAM", MAPPED_VRAM, BYTES, AVERAGE),
+ X("mapped-GTT", MAPPED_GTT, BYTES, AVERAGE),
+ X("buffer-wait-time", BUFFER_WAIT_TIME, MICROSECONDS, CUMULATIVE),
+ X("num-mapped-buffers", NUM_MAPPED_BUFFERS, UINT64, AVERAGE),
+ X("num-GFX-IBs", NUM_GFX_IBS, UINT64, AVERAGE),
+ X("num-SDMA-IBs", NUM_SDMA_IBS, UINT64, AVERAGE),
+ X("GFX-BO-list-size", GFX_BO_LIST_SIZE, UINT64, AVERAGE),
+ X("num-bytes-moved", NUM_BYTES_MOVED, BYTES, CUMULATIVE),
+ X("num-evictions", NUM_EVICTIONS, UINT64, CUMULATIVE),
+ X("VRAM-CPU-page-faults", NUM_VRAM_CPU_PAGE_FAULTS, UINT64, CUMULATIVE),
+ X("VRAM-usage", VRAM_USAGE, BYTES, AVERAGE),
+ X("VRAM-vis-usage", VRAM_VIS_USAGE, BYTES, AVERAGE),
+ X("GTT-usage", GTT_USAGE, BYTES, AVERAGE),
+
+ /* GPIN queries are for the benefit of old versions of GPUPerfStudio,
+ * which use it as a fallback path to detect the GPU type.
+ *
+ * Note: The names of these queries are significant for GPUPerfStudio
+ * (and possibly their order as well). */
+ XG(GPIN, "GPIN_000", GPIN_ASIC_ID, UINT, AVERAGE),
+ XG(GPIN, "GPIN_001", GPIN_NUM_SIMD, UINT, AVERAGE),
+ XG(GPIN, "GPIN_002", GPIN_NUM_RB, UINT, AVERAGE),
+ XG(GPIN, "GPIN_003", GPIN_NUM_SPI, UINT, AVERAGE),
+ XG(GPIN, "GPIN_004", GPIN_NUM_SE, UINT, AVERAGE),
+
+ X("temperature", GPU_TEMPERATURE, UINT64, AVERAGE),
+ X("shader-clock", CURRENT_GPU_SCLK, HZ, AVERAGE),
+ X("memory-clock", CURRENT_GPU_MCLK, HZ, AVERAGE),
+
+ /* The following queries must be at the end of the list because their
+ * availability is adjusted dynamically based on the DRM version. */
+ X("GPU-load", GPU_LOAD, UINT64, AVERAGE),
+ X("GPU-shaders-busy", GPU_SHADERS_BUSY, UINT64, AVERAGE),
+ X("GPU-ta-busy", GPU_TA_BUSY, UINT64, AVERAGE),
+ X("GPU-gds-busy", GPU_GDS_BUSY, UINT64, AVERAGE),
+ X("GPU-vgt-busy", GPU_VGT_BUSY, UINT64, AVERAGE),
+ X("GPU-ia-busy", GPU_IA_BUSY, UINT64, AVERAGE),
+ X("GPU-sx-busy", GPU_SX_BUSY, UINT64, AVERAGE),
+ X("GPU-wd-busy", GPU_WD_BUSY, UINT64, AVERAGE),
+ X("GPU-bci-busy", GPU_BCI_BUSY, UINT64, AVERAGE),
+ X("GPU-sc-busy", GPU_SC_BUSY, UINT64, AVERAGE),
+ X("GPU-pa-busy", GPU_PA_BUSY, UINT64, AVERAGE),
+ X("GPU-db-busy", GPU_DB_BUSY, UINT64, AVERAGE),
+ X("GPU-cp-busy", GPU_CP_BUSY, UINT64, AVERAGE),
+ X("GPU-cb-busy", GPU_CB_BUSY, UINT64, AVERAGE),
+ X("GPU-sdma-busy", GPU_SDMA_BUSY, UINT64, AVERAGE),
+ X("GPU-pfp-busy", GPU_PFP_BUSY, UINT64, AVERAGE),
+ X("GPU-meq-busy", GPU_MEQ_BUSY, UINT64, AVERAGE),
+ X("GPU-me-busy", GPU_ME_BUSY, UINT64, AVERAGE),
+ X("GPU-surf-sync-busy", GPU_SURF_SYNC_BUSY, UINT64, AVERAGE),
+ X("GPU-cp-dma-busy", GPU_CP_DMA_BUSY, UINT64, AVERAGE),
+ X("GPU-scratch-ram-busy", GPU_SCRATCH_RAM_BUSY, UINT64, AVERAGE),
+};
+
+#undef X
+#undef XG
+#undef XFULL
+
+static unsigned r600_get_num_queries(struct r600_common_screen *rscreen)
+{
+ if (rscreen->info.drm_minor >= 42)
+ return ARRAY_SIZE(r600_driver_query_list);
+ else
+ return ARRAY_SIZE(r600_driver_query_list) - 25;
+}
+
+static int r600_get_driver_query_info(struct pipe_screen *screen,
+ unsigned index,
+ struct pipe_driver_query_info *info)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+ unsigned num_queries = r600_get_num_queries(rscreen);
+
+ if (!info) {
+ unsigned num_perfcounters =
+ r600_get_perfcounter_info(rscreen, 0, NULL);
+
+ return num_queries + num_perfcounters;
+ }
+
+ if (index >= num_queries)
+ return r600_get_perfcounter_info(rscreen, index - num_queries, info);
+
+ *info = r600_driver_query_list[index];
+
+ switch (info->query_type) {
+ case R600_QUERY_REQUESTED_VRAM:
+ case R600_QUERY_VRAM_USAGE:
+ case R600_QUERY_MAPPED_VRAM:
+ info->max_value.u64 = rscreen->info.vram_size;
+ break;
+ case R600_QUERY_REQUESTED_GTT:
+ case R600_QUERY_GTT_USAGE:
+ case R600_QUERY_MAPPED_GTT:
+ info->max_value.u64 = rscreen->info.gart_size;
+ break;
+ case R600_QUERY_GPU_TEMPERATURE:
+ info->max_value.u64 = 125;
+ break;
+ case R600_QUERY_VRAM_VIS_USAGE:
+ info->max_value.u64 = rscreen->info.vram_vis_size;
+ break;
+ }
+
+ if (info->group_id != ~(unsigned)0 && rscreen->perfcounters)
+ info->group_id += rscreen->perfcounters->num_groups;
+
+ return 1;
+}
+
+/* Note: Unfortunately, GPUPerfStudio hardcodes the order of hardware
+ * performance counter groups, so be careful when changing this and related
+ * functions.
+ */
+static int r600_get_driver_query_group_info(struct pipe_screen *screen,
+ unsigned index,
+ struct pipe_driver_query_group_info *info)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
+ unsigned num_pc_groups = 0;
+
+ if (rscreen->perfcounters)
+ num_pc_groups = rscreen->perfcounters->num_groups;
+
+ if (!info)
+ return num_pc_groups + R600_NUM_SW_QUERY_GROUPS;
+
+ if (index < num_pc_groups)
+ return r600_get_perfcounter_group_info(rscreen, index, info);
+
+ index -= num_pc_groups;
+ if (index >= R600_NUM_SW_QUERY_GROUPS)
+ return 0;
+
+ info->name = "GPIN";
+ info->max_active_queries = 5;
+ info->num_queries = 5;
+ return 1;
+}
+
+void r600_query_init(struct r600_common_context *rctx)
+{
+ rctx->b.create_query = r600_create_query;
+ rctx->b.create_batch_query = r600_create_batch_query;
+ rctx->b.destroy_query = r600_destroy_query;
+ rctx->b.begin_query = r600_begin_query;
+ rctx->b.end_query = r600_end_query;
+ rctx->b.get_query_result = r600_get_query_result;
+ rctx->b.get_query_result_resource = r600_get_query_result_resource;
+ rctx->render_cond_atom.emit = r600_emit_query_predication;
+
+ if (((struct r600_common_screen*)rctx->b.screen)->info.num_render_backends > 0)
+ rctx->b.render_condition = r600_render_condition;
+
+ LIST_INITHEAD(&rctx->active_queries);
+}
+
+void r600_init_screen_query_functions(struct r600_common_screen *rscreen)
+{
+ rscreen->b.get_driver_query_info = r600_get_driver_query_info;
+ rscreen->b.get_driver_query_group_info = r600_get_driver_query_group_info;