+#if GEN_GEN >= 8
+ case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
+ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+ pc.CommandStreamerStallEnable = true;
+ pc.StallAtPixelScoreboard = true;
+ }
+ cmd_buffer->perf_query_pool = pool;
+
+ /* We know the bottom bits of the address are 0s which match what we
+ * want in the MI_RPC packet.
+ */
+ struct gen_mi_value mi_rpc_write_offset =
+ gen_mi_iadd(
+ &b,
+ gen_mi_imm(
+ gen_canonical_address(
+ pool->bo->offset +
+ khr_perf_query_oa_offset(pool, query, 0 /* pass */, false))),
+ gen_mi_reg64(ANV_PERF_QUERY_OFFSET_REG));
+ struct gen_mi_address_token mi_rpc_addr_dest =
+ gen_mi_store_address(&b, mi_rpc_write_offset);
+ gen_mi_self_mod_barrier(&b);
+
+ void *mi_rpc_dws =
+ anv_batch_emitn(&cmd_buffer->batch,
+ GENX(MI_REPORT_PERF_COUNT_length),
+ GENX(MI_REPORT_PERF_COUNT),
+ .MemoryAddress = query_addr /* Will be overwritten */ );
+ _gen_mi_resolve_address_token(&b, mi_rpc_addr_dest,
+ mi_rpc_dws +
+ GENX(MI_REPORT_PERF_COUNT_MemoryAddress_start) / 8);
+ break;
+ }
+#endif
+
+ case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: {
+ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+ pc.CommandStreamerStallEnable = true;
+ pc.StallAtPixelScoreboard = true;
+ }
+ anv_batch_emit(&cmd_buffer->batch, GENX(MI_REPORT_PERF_COUNT), rpc) {
+ rpc.MemoryAddress =
+ anv_address_add(query_addr, intel_perf_mi_rpc_offset(false));
+ }
+#if GEN_GEN < 9
+ gen_mi_store(&b,
+ gen_mi_mem32(anv_address_add(query_addr,
+ intel_perf_rpstart_offset(false))),
+ gen_mi_reg32(GENX(RPSTAT1_num)));
+#else
+ gen_mi_store(&b,
+ gen_mi_mem32(anv_address_add(query_addr,
+ intel_perf_rpstart_offset(false))),
+ gen_mi_reg32(GENX(RPSTAT0_num)));
+#endif
+#if GEN_GEN >= 8 && GEN_GEN <= 11
+ gen_mi_store(&b, gen_mi_mem64(anv_address_add(query_addr,
+ intel_perf_counter(false))),
+ gen_mi_reg64(GENX(PERFCNT1_num)));
+ gen_mi_store(&b, gen_mi_mem64(anv_address_add(query_addr,
+ intel_perf_counter(false) + 8)),
+ gen_mi_reg64(GENX(PERFCNT2_num)));
+#endif
+ break;
+ }
+